---
title: "Main replication document"
subtitle: "Breaking out of Legacy Mobilization Networks: How the Internet Reaches and
Activates the Politically Disengaged"
author: "Francesco Bailo (francesco.bailo@sydney.edu.au)"
institution: "The University of Sydney"
geometry: margin=1.5cm
output:
  bookdown::pdf_document2:   
    toc: false
    keep_tex:  true
---

With the exclusion of files containing identifiable meetup data (i.e. groups, events, venues) and Itanes (third-party) survey data, data files referred to in the document and cache data files produced during by each code chunk are made available. Cache data files can be loaded with the `knitr::load_cache()` function.

```{r setup, include=FALSE}

knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE, cache = TRUE, fig.asp=0.50, fig.align = 'center', dev = 'pdf', fig.width = 8)

options(scipen=999)

# Change accordingly
n_cores <- 8

```

```{r load-packages}

required_packages <- 
  c("tidyverse", "sf", "readxl", "knitr", "kableExtra", "igraph", "haven",
    "gridExtra", "viridis", "stargazer", "raster", "scales", "plyr", "cowplot",
    "egg", "spdep", "CARBayes", "coda", "zoo",
    "lme4", "jtools", "mcp")

new_packages <- 
  required_packages[!(required_packages %in% 
                       installed.packages()[,"Package"])]

if(length(new_packages)) install.packages(new_packages)

capture.output(lapply(required_packages, require, character.only = TRUE),  file='NUL')

sessionInfo()

```

```{r functions}

toNumeric <- function(x, .na = NULL) {
  require(haven)
  x <- as.numeric(x)
  x[x %in% .na] <- NA
  return(x)
}

naToZero <- function(x) {
  return(ifelse(is.na(x), 0, x))
}

shapeInt2Hex <- function(shape.sf, shape_index, shape_vars,
                         pnt.sf, pnt_var) {

  pnt.sf$pnt_var <- 
    pnt.sf[[pnt_var]]
  
  pnt.sf$pnt_var[is.na(pnt.sf$pnt_var)] <- 0

  res <-
    sf::st_within(pnt.sf,
                  shape.sf)

  res_index <-
    unlist(lapply(res, FUN = function(x) if(length(x) == 0) {return(NA)} else{return(x)}))
  
  pnt.sf$shape_index <-
    shape.sf[[shape_index]][res_index]

  for (i in 1:length(shape_vars)) {
    pnt.sf[[shape_vars[i]]] <-
      shape.sf[[shape_vars[i]]][res_index]
  }

  pnt.sf <-
    pnt.sf %>%
    dplyr::filter(!is.na(shape_index)) %>%
    dplyr::group_by(shape_index) %>%
    dplyr::mutate(shape_tot = sum(pnt_var))

  pnt.sf$shape_frac <- pnt.sf$pnt_var / pnt.sf$shape_tot

  for (i in 1:length(shape_vars)) {
    pnt.sf[[paste0(shape_vars[i], "_frac")]] <-
      pnt.sf[[shape_vars[i]]] * pnt.sf$shape_frac
  }

  return(pnt.sf)

}

```

# Data preparation

## Load data

```{r load-data}

sez2011_lonlat.df <-
  read_csv("data/istat/census_lonlat_sez_2011.csv.zip")

sez2011_pop.df <- 
  read_csv("data/istat/census_population_sez_2011.csv.zip")

sez2011_ind.df <- 
  read_csv("data/istat/census_industry_sez_2011.csv.zip")

regions.sf <- 
  read_sf("data/istat/Reg01012020_g/Reg01012020_g_WGS84.shp")

regions_den_en.df <- 
  read_csv("data/wikidata/ita_regions_den_en.csv") %>%
  dplyr::filter(label_lang == 'en')

regions.sf$DEN_REG_en <- 
  regions_den_en.df$label[match(regions.sf$COD_REG,
                                as.numeric(regions_den_en.df$istat_code))]
comuni8092.sf <-
  read_sf("data/istat/Com2011_g/Com2011_g_WGS84.shp")

comuni8101_2006.sf <- 
  read_sf("data/istat/Com01012006/Com01012006_WGS84.shp")

ge2006_comuni_votes.df <- 
  read.csv("data/min-interior/open_csv_20060409_camera_geo_count.csv")

ge2006_comuni_votes.df$PRO_COM_T <-
  sprintf("%06d", ge2006_comuni_votes.df$istat_cod)

comuni8101_2008.sf <-
  read_sf("data/istat/Com01012008/Com01012008_WGS84.shp")

ge2008_comuni_votes.df <-
  read.csv("data/min-interior/open_csv_20080413_camera_geo_count.csv")

ge2008_comuni_votes.df$PRO_COM_T <-
  sprintf("%06d", ge2008_comuni_votes.df$istat_cod)

load("data/istat/comuni7954.sf.RData")

load("data/istat/censimento_noprofit_comune.RData")

load("data/min-interior/referendum2016_turnout.RData")

ge2013_comuni_results <- 
  read_delim("data/min-interior/ge2013_comuni_results.csv.zip", 
    ";", escape_double = FALSE, trim_ws = TRUE)

```

```{r load-meetup}

# Note: Meetup data contains identifiable information and is password protected
# Request access here: https://doi.org/10.7910/DVN/MNHJTQ

load("data/meetup/meetup_event.RData") 

load("data/meetup/meetup_venue.RData") 

load("data/meetup/meetup_group.RData") 

load("data/meetup/meetup_venue_googleapi.RData")

```

```{r load-itanes}

# Note: Itanes data is third party data and password protected.  
# Request access on itanes.org

itanes_2013 <- 
  read_sav("data/itanes/ITA2013_(envers2015_01_19).sav")

```

## Define hexagonal cells

Hexagonal cells have a size of approximately 15 km.

```{r define-hex, eval = F}

regions.sf$unit <- 
  1

italy.sf <- 
  regions.sf %>%
  dplyr::group_by(unit) %>%
  dplyr::summarize(AREA = sum(SHAPE_AREA))

italy_hex.sf <-
  st_make_grid(italy.sf,
               15000,
               what = "polygons",
               square = FALSE)

italy_hex.sf <- 
  st_sf(index = 1:length(lengths(italy_hex.sf)), italy_hex.sf)

italy_hex_cropped.sf <-
  st_intersection(italy_hex.sf, 
                  italy.sf %>% st_make_valid())

italy_hex_cropped.sf$unit <- 
  NULL

italy_hex_cropped.sf$area <- 
  st_area(italy_hex_cropped.sf)

save(italy.sf, italy_hex.sf, italy_hex_cropped.sf, file = "data/out/define-hex.RData")

```

```{r load-plot-hex, fig.cap=sprintf("Hexagonal cells of size 15km (n=%s)", nrow(italy_hex_cropped.sf)), fig.width = 12}

load("data/out/define-hex.RData")

ggplot(italy_hex_cropped.sf) +
  geom_sf()

```

```{r hex-regions}

res <- 
  st_intersection(italy_hex_cropped.sf %>% st_centroid(), 
               regions.sf %>% st_make_valid())

hex_region.df <-
  data.frame(hex = italy_hex_cropped.sf$index,
             stringsAsFactors = F)

hex_region.df$COD_REG <- 
  res$COD_REG[match(hex_region.df$hex,
                    res$index)]

res <- 
  st_intersection(italy_hex.sf %>% 
                    dplyr::filter(index %in% hex_region.df$hex[is.na(hex_region.df$COD_REG)]),
                  regions.sf %>% st_make_valid()) %>%
  dplyr::mutate(intersection_area = st_area(.))

st_geometry(res) <- NULL

res <- 
  res %>%
  dplyr::group_by(index) %>%
  dplyr::summarise(COD_REG = COD_REG[which.max(intersection_area)])

hex_region.df$COD_REG[hex_region.df$hex %in% res$index] <-
  res$COD_REG

hex_region.df$DEN_REG <- 
  regions.sf$DEN_REG[match(hex_region.df$COD_REG,
                           regions.sf$COD_REG)]           
          
```

```{r load-plot-hex-region, fig.cap="Hexagonal cells and corresponding region", fig.width = 12}

italy_hex_cropped.sf %>%
  dplyr::mutate(DEN_REG = hex_region.df$DEN_REG[match(index,
                                                      hex_region.df$hex)]) %>%
ggplot() +
  geom_sf(aes(fill = factor(DEN_REG)), colour = NA) +
  labs(fill = NULL)

```

## Add census results to hexagonal cells

```{r pop-census-labelling}

pop_census_labelling <- 
  read.csv("data/istat/census_vars_2011.csv", sep = ";",
           encoding = "latin1")

pop_census_labelling %>%
  dplyr::filter(grepl("^(P\\d|ST\\d)", NOME_CAMPO)) %>%
  kable(booktabs = T) %>%
  kable_styling(latex_options = c("striped", "scale_down"))

```

```{r ind-census-labelling}

ind_census_labels.df <- 
  readxl::read_xls("data/istat/Ateco 2007 VI cifra 17 dicembre 2008 1.xls",
                   skip = 1) %>%
  dplyr::filter(CODICE %in% LETTERS[1:19]) %>%
  dplyr::select(1:2)

ind_census_labels.df %>%
  kable(booktabs = T) %>%
  kable_styling(latex_options = c("striped", "scale_down"))

```

```{r sez2011-lonlat.sf}

sez2011_lonlat.sf <- 
  st_as_sf(sez2011_lonlat.df, 
           coords = c("lon", "lat"), 
           crs = 4326)

sez2011_lonlat.sf$P1 <-
  sez2011_pop.df$P1[match(sez2011_lonlat.sf$sez2011,
                          sez2011_pop.df$sez2011)]
sez2011_lonlat.sf$cod_reg <-
  sez2011_pop.df$cod_reg[match(sez2011_lonlat.sf$sez2011,
                               sez2011_pop.df$sez2011)]

res <- 
  st_within(sez2011_lonlat.sf %>% st_transform(32632),
            italy_hex.sf)

sez2011_lonlat.sf$hex <- 
  unlist(lapply(res, 
                FUN = function(x) return(ifelse(length(x) == 0, NA, x))))

sez2011_pop.df$hex <- 
  sez2011_lonlat.sf$hex[match(sez2011_pop.df$sez2011,
                              sez2011_lonlat.sf$sez2011)]

res <- 
  st_within(sez2011_lonlat.sf %>% st_transform(32632),
            italy_hex.sf)

```

```{r add-census-to-hex}

hex_pop.df <- 
  sez2011_pop.df %>%
  dplyr::group_by(hex) %>%
  dplyr::summarise_at(.vars = vars(P1:ST15), 
                      sum, na.rm = TRUE)

sez2011_ind.df <-
  sez2011_ind.df %>%
  dplyr::mutate(ATECO2 = gsub("\\d$", "", sprintf("%03d", ATECO3)),
                ATECOsez = cut(as.numeric(ATECO2),
                               breaks = c(0, 
                                          4,
                                          10,
                                          35,
                                          36,
                                          41,
                                          45,
                                          49,
                                          55,
                                          58,
                                          64,
                                          68,
                                          69,
                                          77,
                                          84,
                                          85,
                                          86,
                                          90,
                                          94,
                                          97,
                                          99,
                                          100),
                               labels = LETTERS[1:21],
                               include.lowest = T),
                newcat = as.character(ATECOsez))

sez2011_ind.df$newcat <- 
  paste0(sez2011_ind.df$newcat, 
         "_", 
         sez2011_ind.df$TIPO_SOGGETTO)

sez2011_ind.df$newcat <-
  gsub("(_IM|_IP)", "", sez2011_ind.df$newcat)

sez2011_ind.df$newcat[grepl("[A-Z]_NP$", sez2011_ind.df$newcat)] <- "NP"

sez2011_ind.df$newcat[sez2011_ind.df$newcat == "NP" &
                        sez2011_ind.df$ATECO3 == "949"] <- "NP_949"

sez2011_ind.df$ADDETTI[is.na(sez2011_ind.df$ADDETTI)] <- 0

sez2011_ind.df$ALTRI_RETRIB[is.na(sez2011_ind.df$ALTRI_RETRIB)] <- 0

sez2011_ind.df$VOLONTARI[is.na(sez2011_ind.df$VOLONTARI)] <- 0

sez2011_ind.df$all_employeed <- 
  sez2011_ind.df$ADDETTI + 
  sez2011_ind.df$ALTRI_RETRIB + 
  sez2011_ind.df$VOLONTARI

sez2011_ind_wide.df <-
  sez2011_ind.df %>%
  dplyr::arrange(newcat) %>%
  tidyr::pivot_wider(id_cols = "SEZ2011",
                     names_from = "newcat",
                     values_from = "all_employeed",
                     values_fn = list(all_employeed = sum),
                     values_fill = list(all_employeed = 0))

sez2011_ind_wide.df$hex <- 
  sez2011_lonlat.sf$hex[match(sez2011_ind_wide.df$SEZ2011,
                              sez2011_lonlat.sf$sez2011)]

hex_ind.df <- 
  sez2011_ind_wide.df %>%
  dplyr::group_by(hex) %>%
  dplyr::summarise_at(.vars = vars(A:S), 
                      sum, na.rm = TRUE)

italy_hex_cropped.sf$P1 <- 
  hex_pop.df$P1[match(italy_hex_cropped.sf$index, 
                      hex_pop.df$hex)]

italy_hex_cropped.sf$P1[is.na(italy_hex_cropped.sf$P1)] <- 
  0

save(hex_pop.df, file = "data/out/hex_pop.df.RData")

save(hex_ind.df, file = "data/out/hex_ind.df.RData")

```

```{r load-hex-ind}

load("data/out/hex_ind.df.RData")

hex_ind.df %>%
  tidyr::pivot_longer(cols = A:S,
                      names_to = "var",
                      values_to = "value") %>%
  ggplot() +
  geom_density(aes(log(value))) +
  facet_wrap("var", scales = "free")

```

```{r hex-ranking-on-pop-density}

load("data/out/hex_pop.df.RData")

hex_pop.df$area <- 
  italy_hex_cropped.sf$area[match(hex_pop.df$hex,
                                  italy_hex_cropped.sf$index)]

hex_pop.df$density <-
  hex_pop.df$P1 / (as.numeric(max(hex_pop.df$area,na.rm=T)) * 1e-6)

hex_P1_rank.df <- 
  data.frame(hex = hex_pop.df$hex,
             density = hex_pop.df$density,
             P1 = hex_pop.df$P1) %>%
  dplyr::arrange(density)

hex_P1_rank.df$P1_cumsum <- 
  cumsum(hex_P1_rank.df$P1)

hex_P1_rank.df$P1_cut <- 
  cut(hex_P1_rank.df$P1_cumsum,
      breaks = 
        c(1,
          sum(hex_P1_rank.df$P1) * .005,
          sum(hex_P1_rank.df$P1) * .95,
          sum(hex_P1_rank.df$P1) * 1),
      labels = c("0-.5%", ".5-95%", "95-100%"),
      include.lowest = T)

hex_P1_rank.df$P1_cut_6 <- 
  cut(hex_P1_rank.df$P1_cumsum,
      breaks = 
        c(1,
          sum(hex_P1_rank.df$P1) * .075,
          sum(hex_P1_rank.df$P1) * .20,
          sum(hex_P1_rank.df$P1) * .40,
          sum(hex_P1_rank.df$P1) * .60,
          sum(hex_P1_rank.df$P1) * .95,
          sum(hex_P1_rank.df$P1) * 1),
      labels = c("1", "2", "3", "4", "5", "6"),
      include.lowest = T)

italy_hex_cropped.sf$P1_cut_6 <- 
  hex_P1_rank.df$P1_cut_6[match(italy_hex_cropped.sf$index,
                              hex_P1_rank.df$hex)]

italy_hex_cropped.sf$P1_cut_6 <- 
            dplyr::recode(italy_hex_cropped.sf$P1_cut_6, 
                   `1` = sprintf("1 = 7.5%% of pop. (%s per km2)", 
                                 format(round(median(hex_P1_rank.df$density[
                                   hex_P1_rank.df$P1_cut_6 == 1
                                 ]), 2))), 
                   `2` = sprintf("2 = 12.5%% of pop. (%s per km2)", 
                                 format(round(median(hex_P1_rank.df$density[
                                   hex_P1_rank.df$P1_cut_6 == 2
                                 ]), 2))), 
                   `3` = sprintf("3 = 20%% of pop. (%s per km2)", 
                                 format(round(median(hex_P1_rank.df$density[
                                   hex_P1_rank.df$P1_cut_6 == 3
                                 ]), 2))),
                   `4` = sprintf("4 = 20%% of pop. (%s per km2)", 
                                 format(round(median(hex_P1_rank.df$density[
                                   hex_P1_rank.df$P1_cut_6 == 4
                                 ]), 2))),
                   `5` = sprintf("5 = 35%% of pop. (%s per km2)", 
                                 format(round(median(hex_P1_rank.df$density[
                                   hex_P1_rank.df$P1_cut_6 == 5
                                 ]), 2))),
                   `6` = sprintf("6 = 5%% of pop. (%s per km2)", 
                                 format(round(median(hex_P1_rank.df$density[
                                   hex_P1_rank.df$P1_cut_6 == 6
                                 ]), 2))))

italy_hex_cropped.sf$P1 <- 
  hex_pop.df$P1[match(italy_hex_cropped.sf$index,
                      hex_pop.df$hex)]

```

```{r prepare-hex-census-df}

load("data/out/hex_ind.df.RData")

load("data/out/hex_pop.df.RData")

hex_census.df <- 
  merge(hex_ind.df,
        hex_pop.df %>%
          dplyr::select(hex,
                        P1,P17:P29,
                        P47,P60,P61,P62,
                        P128,P130,P131,
                        P137,P138,P139,
                        ST9:ST13),
        by = "hex",
        all = T)

hex_census.df <- 
  hex_census.df %>%
  dplyr::mutate(pop_over20 = P18+P19+P20+P21+P22+P23+P24+P25+P26+P27+P28+P29,
                pop_ind = A+B+C+D+E+`F`+G+H+I+J+K+L+M+N+O+P+Q+R+S)

```

```{r plot-hex-pop-density, fig.cap = "Log-transformed population (Census 2011)", fig.width = 12}

ggplot() +
  geom_sf(data = regions.sf, fill = NA, size = .05, alpha = .5) +
  geom_sf(data = italy_hex_cropped.sf,
          aes(fill = log(P1)), colour = NA) +
  scale_fill_viridis_b() +
  labs(fill = "Population (log-transformed)")

```

```{r plot-hex-pop-cuts, fig.cap = "Population density ordinal variable", fig.width = 20}

ggplot() +
  geom_sf(data = regions.sf, fill = NA, size = .05, alpha = .5) +
  geom_sf(data = 
            italy_hex_cropped.sf,
          fill = 'black') +
  scale_fill_brewer(palette = "Set1") +
  facet_wrap(vars(P1_cut_6), ncol = 3) +
  guides(fill = "none")

```

```{r fig.cap = "Population density of hexagonal cells determining ordinal variable"}

hex_P1_rank.df %>%
  dplyr::mutate(`density rank` = order(P1)) %>%
  ggplot(aes(y = P1_cumsum / sum(P1), x = `density rank`)) +
  geom_point(aes(colour = P1_cut_6)) +
  scale_x_continuous(breaks = c(0, 500, 864, 1000, 1500)) +
  scale_y_continuous(breaks = c(0, .075, .20, .40, .60, .95, 1),
                     labels = c("0%", "7.5%", "20%", "40%", "60%", "95%", "100%")) +
  geom_hline(yintercept = c(0, .075, .20, .40, .60, .95, 1), colour = 'gray') +
  labs(y = "Cumulative percentage of total population", colour = "Ordinal variable")
  
```

## Add vote data to hexagon cells

### 2006

```{r national-turnout-2006}

comuni8101_2006.sf$registered_voters <- 
  ge2006_comuni_votes.df$registered_voters[match(comuni8101_2006.sf$PRO_COM_T,
                                                 ge2006_comuni_votes.df$PRO_COM_T)]

comuni8101_2006.sf$effective_voters <- 
  ge2006_comuni_votes.df$effective_voters[match(comuni8101_2006.sf$PRO_COM_T,
                                                 ge2006_comuni_votes.df$PRO_COM_T)]

comuni8101_2006.sf$turnout <-
  comuni8101_2006.sf$effective_voters / 
  comuni8101_2006.sf$registered_voters

ggplot(comuni8101_2006.sf, 
       aes(fill = turnout)) +
  geom_sf(colour = NA) +
  scale_fill_viridis()

hex_italy_eff_voters_2006.df  <- 
  shapeInt2Hex(shape.sf =
                  comuni8101_2006.sf %>%
                 st_transform(32632),
               shape_index =
                 "PRO_COM_T",
               shape_vars =
                 "effective_voters",
               pnt.sf = sez2011_lonlat.sf %>%
                 st_transform(32632),
               pnt_var = "P1")  %>%
  dplyr::group_by(hex) %>%
  dplyr::summarize(effective_voters = sum(effective_voters_frac))

```

### 2008

```{r national-turnout-2008}

comuni8101_2008.sf$registered_voters <- 
  ge2008_comuni_votes.df$registered_voters[match(comuni8101_2008.sf$PRO_COM_T,
                                                 ge2008_comuni_votes.df$PRO_COM_T)]

comuni8101_2008.sf$effective_voters <- 
  ge2008_comuni_votes.df$effective_voters[match(comuni8101_2008.sf$PRO_COM_T,
                                                 ge2008_comuni_votes.df$PRO_COM_T)]

comuni8101_2008.sf$turnout <-
  comuni8101_2008.sf$effective_voters / 
  comuni8101_2008.sf$registered_voters

ggplot(comuni8101_2008.sf, 
       aes(fill = turnout)) +
  geom_sf(colour = NA) +
  scale_fill_viridis()

hex_italy_eff_voters_2008.df  <- 
  shapeInt2Hex(shape.sf =
                  comuni8101_2008.sf %>%
                 st_transform(32632),
               shape_index =
                 "PRO_COM_T",
               shape_vars =
                 "effective_voters",
               pnt.sf = sez2011_lonlat.sf %>%
                 st_transform(32632),
               pnt_var = "P1")  %>%
  dplyr::group_by(hex) %>%
  dplyr::summarize(effective_voters = sum(effective_voters_frac))

```

### 2013

```{r national-turnout-2013}

ge2013_comuni_results$PRO_COM[ge2013_comuni_results$COMUNE == "Vas"] <- 25064
ge2013_comuni_results$PRO_COM[ge2013_comuni_results$COMUNE == "Quero"] <- 25042
ge2013_comuni_results$PRO_COM[ge2013_comuni_results$COMUNE == "Montoro Inferiore"] <- 64061
ge2013_comuni_results$PRO_COM[ge2013_comuni_results$COMUNE == "Montoro Superiore"] <- 64062

comuni8092.sf$M5S_2013_votes <- 
  ge2013_comuni_results$`MOVIMENTO 5 STELLE BEPPEGRILLO.IT`[
    match(as.numeric(as.character(comuni8092.sf$PRO_COM)),
                                                                  ge2013_comuni_results$PRO_COM)]

comuni8092.sf$effective_voters <- 
  ge2013_comuni_results$ppdt_nEffectiveVoters[
    match(as.numeric(as.character(comuni8092.sf$PRO_COM)),
                                                                  ge2013_comuni_results$PRO_COM)]

hex_italy2013.df  <- 
  shapeInt2Hex(shape.sf =
                  comuni8092.sf %>%
                 st_transform(32632),
               shape_index =
                 "PRO_COM_T",
               shape_vars =
                 "M5S_2013_votes",
               pnt.sf = sez2011_lonlat.sf %>%
                 st_transform(32632),
               pnt_var = "P1")  %>%
  dplyr::group_by(hex) %>%
  dplyr::summarize(M5S_2013 = sum(M5S_2013_votes_frac))

hex_italy_eff_voters_2013.df  <- 
  shapeInt2Hex(shape.sf =
                  comuni8092.sf %>%
                 st_transform(32632),
               shape_index =
                 "PRO_COM_T",
               shape_vars =
                 "effective_voters",
               pnt.sf = sez2011_lonlat.sf %>%
                 st_transform(32632),
               pnt_var = "P1")  %>%
  dplyr::group_by(hex) %>%
  dplyr::summarize(effective_voters = sum(effective_voters_frac))

hex_italy_eff_voters_2013.df$effective_voters_2006 <-
  hex_italy_eff_voters_2006.df$effective_voters[match(hex_italy_eff_voters_2013.df$hex,
                                                      hex_italy_eff_voters_2006.df$hex)]

hex_italy_eff_voters_2013.df$effective_voters_2008 <-
  hex_italy_eff_voters_2008.df$effective_voters[match(hex_italy_eff_voters_2013.df$hex,
                                                      hex_italy_eff_voters_2008.df$hex)]

hex_italy_eff_voters_2013.df$P1.sum <-
  hex_pop.df$P1[match(hex_italy_eff_voters_2013.df$hex, 
                      hex_pop.df$hex)]

hex_italy_eff_voters_2013.df <-
  hex_italy_eff_voters_2013.df %>%
  rowwise() %>%
  dplyr::mutate(effective_voters_mean_2006_08 = 
                  mean(c(effective_voters_2006, effective_voters_2008)))

hex_italy_eff_voters_2013.df$turnout <- 
  hex_italy_eff_voters_2013.df$effective_voters / 
  hex_italy_eff_voters_2013.df$P1.sum

hex_italy_eff_voters_2013.df$turnout_2006_08 <- 
  hex_italy_eff_voters_2013.df$effective_voters_mean_2006_08 / 
  hex_italy_eff_voters_2013.df$P1.sum

hex_italy_eff_voters_2013.df$M5S_2013 <-
  hex_italy2013.df$M5S_2013[match(hex_italy_eff_voters_2013.df$hex,
                                  hex_italy2013.df$hex)]

hex_italy_eff_voters_2013.df$M5S_2013_perc <-
  hex_italy_eff_voters_2013.df$M5S_2013 / 
  hex_italy_eff_voters_2013.df$P1.sum

```

### Digital divide

```{r digital-divide}

DCCV_ICT_31072023084525673 <- 
  read.csv("data/istat/DCCV_ICT_31072023084525673.csv")

require(tidyverse)

region_dict <- 
  c("Abruzzo",
    "Basilicata",
    "Calabria",
    "Campania",
    "Emilia-Romagna",
    "Friuli-Venezia Giulia",
    "Lazio",
    "Liguria",
    "Lombardia",
    "Marche",
    "Molise",
    "Piemonte",
    "Puglia",
    "Sardegna",
    "Sicilia",
    "Toscana",
    "Trentino Alto Adige / Südtirol",
    "Umbria",
    "Valle d'Aosta / Vallée d'Aoste",
    "Veneto")

names(region_dict) <- regions.sf$COD_REG[match(region_dict,
                                               regions.sf$DEN_REG)]

names(region_dict)[region_dict == "Friuli-Venezia Giulia"] <- 6
names(region_dict)[region_dict == "Trentino Alto Adige / Südtirol"] <- 4
names(region_dict)[region_dict == "Valle d'Aosta / Vallée d'Aoste"] <- 2

DCCV_ICT_31072023084525673 %>%
  dplyr::filter(Tipo.dato == "famiglie che dispongono di accesso a Internet da casa") %>%
  dplyr::filter(grepl("\\sab.", Territorio)) %>%
  ggplot(aes(x = TIME, y = Value, colour = Territorio)) + 
  geom_line()

pop_cat_by_region <- 
  data.frame(COD_REG = comuni8092.sf$COD_REG,
             POP_2011 = comuni8092.sf$POP_2011) %>%
  dplyr::mutate(Territorio = cut(POP_2011, 
                                 breaks = c(0, 
                                            2000,
                                            10000,
                                            50000,
                                            Inf),
                                 labels = c("fino a 2.000 ab.",
                                            "2.001 - 10.000 ab.",
                                            "10.001 - 50.000 ab.",
                                            "50.001 ab. e più"),
                                 ordered_result = TRUE))

pop_cat_by_region %>%
  # dplyr::group_by(COD_REG) %>%
  dplyr::summarise(w = weighted.mean(as.numeric(Territorio), w = POP_2011))



home_internet_by_pop.df <- 
  DCCV_ICT_31072023084525673 %>%
  dplyr::filter(Tipo.dato == "famiglie che dispongono di accesso a Internet da casa" &
                  grepl("\\sab.", Territorio))  %>%
  dplyr::mutate(Territorio = factor(Territorio, levels = c("fino a 2.000 ab.",
                                                           "2.001 - 10.000 ab.",
                                                           "10.001 - 50.000 ab.",
                                                           "50.001 ab. e più"), 
                                    ordered = T)) %>%
  dplyr::select(TIME, Territorio, Value) %>%
  dplyr::group_by(TIME) %>%
  dplyr::mutate(penalty = Value / Value[Territorio == "10.001 - 50.000 ab."])

home_internet_by_region.df <- 
  DCCV_ICT_31072023084525673 %>%
  dplyr::filter(Tipo.dato == "famiglie che dispongono di accesso a Internet da casa" &
                  Territorio %in% region_dict) %>%
  dplyr::select(TIME, Territorio, Value)

home_internet_by_region.df$COD_REG <- 
  names(region_dict)[match(home_internet_by_region.df$Territorio,
                           region_dict)]

home_internet_by_pop.df %>%
  ggplot(aes(x = TIME, y = penalty, colour = Territorio)) + 
  geom_line()

# ===

res <- 
  st_intersection(italy_hex_cropped.sf %>% st_centroid(), 
                  comuni8092.sf %>% st_make_valid())

hex_comuni8092_digital_divide.df <-
  data.frame(hex = italy_hex_cropped.sf$index,
             stringsAsFactors = F)

hex_comuni8092_digital_divide.df$PRO_COM_T <- 
  res$PRO_COM_T[match(hex_comuni8092_digital_divide.df$hex,
                    res$index)]

res <- 
  st_intersection(italy_hex.sf %>% 
                    dplyr::filter(index %in% hex_comuni8092_digital_divide.df$hex[is.na(hex_comuni8092_digital_divide.df$PRO_COM_T)]),
                  comuni8092.sf %>% st_make_valid()) %>%
  dplyr::mutate(intersection_area = st_area(.))

st_geometry(res) <- NULL

res <- 
  res %>%
  dplyr::group_by(index) %>%
  dplyr::summarise(PRO_COM_T = PRO_COM_T[which.max(intersection_area)])

hex_comuni8092_digital_divide.df$PRO_COM_T[hex_comuni8092_digital_divide.df$hex %in% res$index] <-
  res$PRO_COM_T

hex_comuni8092_digital_divide.df$POP_2011 <- 
  comuni8092.sf$POP_2011[match(hex_comuni8092_digital_divide.df$PRO_COM_T,
                               comuni8092.sf$PRO_COM_T)]   

hex_comuni8092_digital_divide.df$COD_REG <- 
  comuni8092.sf$COD_REG[match(hex_comuni8092_digital_divide.df$PRO_COM_T,
                               comuni8092.sf$PRO_COM_T)]  

hex_comuni8092_digital_divide.df$Territorio <-
  cut(hex_comuni8092_digital_divide.df$POP_2011, 
      breaks = c(0, 
                 2000,
                 10000,
                 50000,
                 Inf),
      labels = c("fino a 2.000 ab.",
                 "2.001 - 10.000 ab.",
                 "10.001 - 50.000 ab.",
                 "50.001 ab. e più"),
      ordered_result = TRUE)

hex_comuni8092_digital_divide.df <- 
  hex_comuni8092_digital_divide.df %>%
  dplyr::left_join(home_internet_by_region.df %>% 
                     dplyr::select(-Territorio) %>%
                     dplyr::mutate(COD_REG = as.numeric(COD_REG)), 
                   by = 'COD_REG')

hex_comuni8092_digital_divide.df <- 
  hex_comuni8092_digital_divide.df %>%
  dplyr::left_join(home_internet_by_pop.df %>% 
                     dplyr::select(-Value), 
                   by = c('TIME', 'Territorio'))

hex_DD.list <- 
  list()

for (i in 2005:2018) {
  hex_DD.list[[as.character(i)]] <- 
    italy_hex.sf %>%
    dplyr::left_join(hex_comuni8092_digital_divide.df %>%
                       dplyr::filter(TIME == i), by = c(index = 'hex')) %>%
    ggplot(aes(fill = Value * penalty)) + 
    geom_sf(colour = NA) +
    scale_fill_distiller(palette = 'Spectral', limits = c(20, 90)) +
    theme_bw() + labs(title = i) + guides(fill = "none")
  
}

require(cowplot)

cowplot::plot_grid(plotlist = hex_DD.list)

```


## Meetup events

* Number of meetup events: `r nrow(meetup_event)`;
* Number of venues: `r nrow(meetup_venue)`;
* Number of meetup groups: `r length(unique(meetup_group$group_id))`;
* Number of meetup groups with at least one event: `r length(unique(meetup_event$group_id))`;
* Number of meetup groups that at least once were found not to be private:

```{r}

meetup_group %>%
  dplyr::group_by(group_id) %>%
  dplyr::summarise(visibility = any(visibility == 'public')) %>%
  dplyr::ungroup() %>%
  dplyr::group_by(public = visibility) %>%
  dplyr::count() %>%
  kable(booktabs = T) %>% 
  kable_styling(latex_options = c("striped"))

```

```{r load-intersect-meetup, eval = T}

meetup_events.df <- 
  meetup_event %>%
  dplyr::mutate(date = as.Date(time)) %>%
  dplyr::select(event_id, group_id, date, venue, yes_rsvp_count) %>%
  dplyr::mutate(month = strftime(date, format = "%Y%m")) %>%
  dplyr::distinct(event_id, .keep_all = T)

meetup_venue.df <- 
  meetup_venue %>%
  dplyr::distinct(venue_id, .keep_all = T) %>%
  dplyr::filter(lon != 0)

meetup_venue_googleapi$lat <-
  meetup_venue_googleapi$lat_google

meetup_venue_googleapi$lon <-
  meetup_venue_googleapi$lon_google

meetup_venue_googleapi$lat_google <- NULL
meetup_venue_googleapi$lon_google <- NULL

meetup_venue.df <- 
  rbind(meetup_venue.df, 
        meetup_venue_googleapi)
  
meetup_venue.sf <- 
  st_as_sf(meetup_venue.df, 
           coords = c("lon","lat"),
           crs = 4326)

res <- 
  st_within(meetup_venue.sf %>% st_transform(32632),
            italy_hex.sf)

meetup_venue.sf$hex <- 
  unlist(lapply(res, function(x) 
  return(ifelse(length(x)==0, NA, x))))
  
meetup_events.df$hex <- 
  meetup_venue.sf$hex[match(meetup_events.df$venue,
                            meetup_venue.sf$venue_id)]

meetup_events.df <- 
  meetup_events.df %>%
  dplyr::group_by(group_id) %>%
  dplyr::arrange(date) %>%
  tidyr::fill(hex, .direction = "downup")

meetup_groups_hex_wide.df <-
  meetup_events.df %>%
  tidyr::pivot_wider(id_cols = 'hex', 
                     names_from = 'month', 
                     values_from = "group_id", 
                     values_fn = list(group_id = ~length(unique(.))),
                     values_fill = list(group_id = 0))

meetup_events_by_week_ge2013.df <-
  meetup_events.df %>%
  dplyr::filter(!is.na(hex) &
                  date >= as.Date("2013-02-24") - 30 * 6 &
                  date < as.Date("2013-02-24")) 

meetup_events_by_week_ge2013.df$hex <- 
  factor(meetup_events_by_week_ge2013.df$hex,
         levels = italy_hex.sf$index)

meetup_events_by_week_ge2013.df$week <- 
  as.character(as.integer(format(meetup_events_by_week_ge2013.df$date, "%W")))

meetup_events_by_week_ge2013.df$week <- 
  as.integer(format(meetup_events_by_week_ge2013.df$date, "%W"))
meetup_events_by_week_ge2013.df$week <- 
  factor(meetup_events_by_week_ge2013.df$week,
         levels = 0:53)

meetup_groups_by_week_ge2013.df <- 
  meetup_events_by_week_ge2013.df %>%
  dplyr::group_by(hex, week, .drop = FALSE) %>%
  dplyr::summarize(groups = length(unique(group_id)))

meetup_events_by_week_ge2018.df <-
  meetup_events.df %>%
  dplyr::filter(!is.na(hex) &
                  date >= as.Date("2018-03-04") - 30 * 6 &
                  date < as.Date("2018-03-04")) 

meetup_events_by_week_ge2018.df$hex <- 
  factor(meetup_events_by_week_ge2018.df$hex,
         levels = italy_hex.sf$index)

meetup_events_by_week_ge2018.df$week <- 
  as.character(as.integer(format(meetup_events_by_week_ge2018.df$date, "%W")))

meetup_events_by_week_ge2018.df$week <- 
  as.integer(format(meetup_events_by_week_ge2018.df$date, "%W"))
meetup_events_by_week_ge2018.df$week <- 
  factor(meetup_events_by_week_ge2018.df$week,
         levels = 0:53)

meetup_groups_by_week_ge2018.df <- 
  meetup_events_by_week_ge2018.df %>%
  dplyr::group_by(hex, week, .drop = FALSE) %>%
  dplyr::summarize(groups = length(unique(group_id)))

save(meetup_venue.sf,
     meetup_events.df,
     meetup_groups_hex_wide.df,
     meetup_groups_by_week_ge2013.df,
     meetup_groups_by_week_ge2018.df, 
     file = "data/out/load-intersect-meetup.RData")

save(meetup_groups_hex_wide.df,
     meetup_groups_by_week_ge2013.df,
     meetup_groups_by_week_ge2018.df, 
     file = "data/out/load-intersect-meetup-deidentified.RData")

```


```{r load-plot-hex-meetup, fig.cap = "Meetup groups in April 2013", fig.width = 12}

load("data/out/load-intersect-meetup.RData")

italy_hex_cropped.sf$groups <- 
  meetup_groups_hex_wide.df$`201304`[match(italy_hex_cropped.sf$index,
                                           meetup_groups_hex_wide.df$hex)]

italy_hex_cropped.sf$groups[is.na(italy_hex_cropped.sf$groups)] <- 0

ggplot(italy_hex_cropped.sf, aes(fill = sqrt(groups))) +
  geom_sf(colour = NA) + scale_fill_distiller(palette = "YlGnBu", 
                                              direction = 1) +
  labs(fill = "n. groups (sqrt.)")

```

* Events not associated with a geolocated venue: `r sum(is.na(meetup_events.df$venue))`
* Venues without geographic coordinates: `r sum(meetup_venue$lat == 0)``
* Events associated to a hexagonal cell: `r sum(!is.na(meetup_events.df$hex))`

# Meetup analysis

```{r mcp-fit, eval = T}

model = list(
  sum_yes_rsvp_count ~ 1 + week_num,
   ~ 1 + week_num,  # linear segment
  ~ 1 + week_num # linear segment
)

week_seq <- 
  seq(min(meetup_events.df$date),
      max(meetup_events.df$date),
      by = "week")

meetup_rsvp_count_week.df <- 
  meetup_events.df %>%
  dplyr::filter(!is.na(hex)) %>%
  dplyr::group_by(week = cut(date, breaks = week_seq),
                  week_num = as.numeric(week)) %>% 
  dplyr::summarise(sum_yes_rsvp_count = sum(yes_rsvp_count))

set.seed(28100)

fit <- mcp(model,
           meetup_rsvp_count_week.df %>%
             dplyr::filter(as.Date(week) <= as.Date("2012-12-31")))

save(fit, file = "data/out/mcp-fit.RData")

```

```{r change-point}

load("data/out/mcp-fit.RData")

cp_1 <- 
  mean(c(fit$mcmc_post[[1]][,1],
         fit$mcmc_post[[2]][,1],
         fit$mcmc_post[[3]][,1]))

cp_2 <- 
  mean(c(fit$mcmc_post[[1]][,2],
         fit$mcmc_post[[2]][,2],
         fit$mcmc_post[[3]][,2]))

week_seq <- 
  seq(min(meetup_events.df$date),
      max(meetup_events.df$date),
      by = "week")

meetup_rsvp_count_date.df <- 
  meetup_events.df %>%
  dplyr::filter(!is.na(hex)) %>%
  dplyr::group_by(date) %>% 
  dplyr::summarise(sum_yes_rsvp_count = sum(yes_rsvp_count))

meetup_rsvp_count_week.df <- 
  meetup_events.df %>%
  dplyr::filter(!is.na(hex)) %>%
  dplyr::group_by(week = cut(date, breaks = week_seq),
                  week_num = as.numeric(week)) %>% 
  dplyr::summarise(sum_yes_rsvp_count = sum(yes_rsvp_count))

meetup_rsvp_count_hex_week.df <- 
  meetup_events.df %>%
  dplyr::filter(!is.na(hex)) %>%
  dplyr::group_by(week = cut(date, breaks = week_seq),
                  hex) %>% 
  dplyr::summarise(sum_yes_rsvp_count = sum(yes_rsvp_count)) %>%
  dplyr::ungroup()

meetup_rsvp_count_date.df %>%
  ggplot(aes(x = date, y = sum_yes_rsvp_count)) +
  geom_histogram(stat ='identity') +
  geom_vline(xintercept = week_seq[c(round(cp_1), round(cp_2))]) + 
  scale_x_date(limits = c(as.Date("2010-01-01"), as.Date("2012-12-31"))) +
  geom_smooth()
  
meetup_rsvp_count_week.df %>%
  dplyr::filter(as.Date(week) <= as.Date("2013-01-01")) %>%
  ggplot(aes(x = as.Date(week), y = sum_yes_rsvp_count)) +
  geom_histogram(stat ='identity') +
  geom_smooth() +
  geom_vline(xintercept = week_seq[c(round(cp_1), round(cp_2))])

break_dates <- 
  c(as.Date(week_seq[round(cp_1)]), 
    as.Date(week_seq[round(cp_2)+1]),
    as.Date("2013-02-24"))

```

Change point dates: `r break_dates`

# Meetup spatial analysis

```{r meetup-spatial-analysis-prepare}

vars_1.df <-
  data.frame(hex = italy_hex.sf$index)

meetup_rsvp_2007_2012.df <- 
  meetup_events.df %>%
  dplyr::filter(!is.na(hex)) %>%
  dplyr::filter(date >= break_dates[1] & date < break_dates[2]) %>%
  dplyr::group_by(hex) %>%
  dplyr::summarize(yes_rsvp_count = sum(yes_rsvp_count))

meetup_rsvp_2012_2013.df <- 
  meetup_events.df %>%
  dplyr::filter(!is.na(hex)) %>%
  dplyr::filter(date >= break_dates[2] &
                  date < break_dates[3]) %>%
  dplyr::group_by(hex) %>%
  dplyr::summarize(yes_rsvp_count = sum(yes_rsvp_count))

vars_1.df$rsvp_sum_2007_2012 <- 
  meetup_rsvp_2007_2012.df$yes_rsvp_count[match(vars_1.df$hex,
                                                meetup_rsvp_2007_2012.df$hex)]

vars_1.df$rsvp_sum_2012_2013 <- 
  meetup_rsvp_2012_2013.df$yes_rsvp_count[match(vars_1.df$hex,
                                                meetup_rsvp_2012_2013.df$hex)]

vars_1.df$rsvp_sum_2007_2012[is.na(vars_1.df$rsvp_sum_2007_2012)] <- 0

vars_1.df$rsvp_sum_2012_2013[is.na(vars_1.df$rsvp_sum_2012_2013)] <- 0

# Turnout

vars_1.df$turnout_2006_08 <- 
  hex_italy_eff_voters_2013.df$turnout_2006_08[
    match(vars_1.df$hex,
          hex_italy_eff_voters_2013.df$hex)]

# Votes

vars_1.df$M5S_2013_perc <- 
  hex_italy_eff_voters_2013.df$M5S_2013_perc[
    match(vars_1.df$hex,
          hex_italy_eff_voters_2013.df$hex)]

# Census

load("data/out/hex_ind.df.RData")

vars_1.df <- 
  vars_1.df %>%
  dplyr::left_join(hex_census.df %>%
                     dplyr::select(hex, P1, NP_949, P47, P62, P130, P27, P28, P29),
                   by = c(hex = "hex"))

vars_1.df$over65 <- 
  vars_1.df$P27 + vars_1.df$P28 + vars_1.df$P29

vars_1.df$over65[is.na(vars_1.df$over65)] <- 0

vars_1.df$over65_perc <-
  vars_1.df$over65 / 
  vars_1.df$P1

vars_1.df$NP_949[is.na(vars_1.df$NP_949)] <- 0

vars_1.df$NP_949_perc <-
  vars_1.df$NP_949  / 
  vars_1.df$P1

vars_1.df$P47[is.na(vars_1.df$P47)] <- 0

vars_1.df$P47_perc <-
  vars_1.df$P47 / 
  vars_1.df$P1

vars_1.df$P62[is.na(vars_1.df$P62)] <- 0

vars_1.df$P62_perc <-
  vars_1.df$P62 / 
  vars_1.df$P1

vars_1.df$P130[is.na(vars_1.df$P130)] <- 0

vars_1.df$P130_perc <-
  vars_1.df$P130 / 
  vars_1.df$P1

vars_1.df$rsvp_sum_2007_2012_perc <- 
  vars_1.df$rsvp_sum_2007_2012 / vars_1.df$P1

vars_1.df$rsvp_sum_2012_2013_perc <- 
  vars_1.df$rsvp_sum_2012_2013 / vars_1.df$P1


vars_1.df$P1_cut_6 <-
  hex_P1_rank.df$P1_cut_6[match(vars_1.df$hex,
                                hex_P1_rank.df$hex)]

hex.nb <-
  poly2nb(italy_hex.sf, row.names=italy_hex.sf$index, queen=FALSE)

hex.m <-
  nb2mat(hex.nb, style='B', zero.policy = TRUE)

colnames(hex.m) <- italy_hex.sf$index
rownames(hex.m) <- italy_hex.sf$index

hex.m <- 
  hex.m[rowSums(hex.m) > 0, rowSums(hex.m) > 0]

# Prepare

vars_1.df <- 
  vars_1.df %>%
  dplyr::filter(hex %in% colnames(hex.m)) %>%
  dplyr::filter(!is.na(P1) & P1_cut_6 != "1") %>%
  dplyr::mutate(P1_cut_6 = factor(as.character(P1_cut_6), ordered = T))

hex.m <- 
  hex.m[rownames(hex.m) %in% vars_1.df$hex,
        colnames(hex.m) %in% vars_1.df$hex]

hex.m <- 
  hex.m[rowSums(hex.m) > 0, rowSums(hex.m) > 0]

vars_1.df <- 
  vars_1.df %>%
  dplyr::filter(hex %in% colnames(hex.m))

vars_1.df$rsvp_sum_diff  <- 
  sqrt(vars_1.df$rsvp_sum_2012_2013) - 
  sqrt(vars_1.df$rsvp_sum_2007_2012)

vars_1.df$rsvp_perc_diff  <- 
  sqrt(vars_1.df$rsvp_sum_2012_2013_perc) - 
  sqrt(vars_1.df$rsvp_sum_2007_2012_perc)

vars_1.df <- 
  vars_1.df %>%
  dplyr::select(hex, 
                rsvp_sum_2007_2012_perc,
                rsvp_sum_2012_2013_perc,
                rsvp_sum_diff,
                rsvp_perc_diff,
                M5S_2013_perc,
                turnout_2006_08,
                NP_949_perc,
                P47_perc,
                P62_perc,
                P130_perc,
                over65_perc,
                P1_cut_6)

## Digital divide

hex_digital_divide_2007_2012.df <- 
  hex_comuni8092_digital_divide.df %>%
  dplyr::filter(TIME >= 2007 &
                  TIME < 2013) %>%
  dplyr::group_by(hex) %>%
  dplyr::summarise(dd_2007_2012 = mean(Value * penalty))

hex_digital_divide_2013.df <- 
  hex_comuni8092_digital_divide.df %>%
  dplyr::filter(TIME == 2013) %>%
  dplyr::group_by(hex) %>%
  dplyr::summarise(dd_2013 = mean(Value * penalty))

vars_1.df$dd_2007_2012_perc <- 
  hex_digital_divide_2007_2012.df$dd_2007_2012[match(vars_1.df$hex,
                                                     hex_digital_divide_2007_2012.df$hex)] / 
  100

vars_1.df$dd_2013_perc <- 
  hex_digital_divide_2013.df$dd_2013[match(vars_1.df$hex,
                                           hex_digital_divide_2013.df$hex)] /
  100
  

```

```{r meetup-spatial-analysis-model-1} 

# Model

## 2007-2012

formula_1 = 
  sqrt(rsvp_sum_2007_2012_perc) ~ 
  turnout_2006_08 + sqrt(NP_949_perc) + dd_2007_2012_perc +
  P1_cut_6 

var_1_MCMC.chain_1 <- S.CARleroux(
  formula = formula_1,
  data = vars_1.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

var_1_MCMC.chain_2 <- S.CARleroux(
  formula =  formula_1, 
  data = vars_1.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

var_1_MCMC.chain_3 <- S.CARleroux(
  formula = formula_1, 
  data = vars_1.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

beta_var_1.samples_2007_2012 <- 
  mcmc.list(var_1_MCMC.chain_1$samples$beta, 
            var_1_MCMC.chain_2$samples$beta,
            var_1_MCMC.chain_3$samples$beta)

beta_var_1.samples_2007_2012.m <- 
  rbind(var_1_MCMC.chain_1$samples$beta, 
        var_1_MCMC.chain_2$samples$beta,
        var_1_MCMC.chain_3$samples$beta)

colnames(beta_var_1.samples_2007_2012.m) <- 
  colnames(var_1_MCMC.chain_1$X)

formula_2 = 
  sqrt(rsvp_sum_2007_2012_perc) ~ 
  dd_2007_2012_perc +
  P47_perc + P62_perc + P130_perc + over65_perc + 
  P1_cut_6 

var_2_MCMC.chain_1 <- S.CARleroux(
  formula = formula_2,
  data = vars_1.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

var_2_MCMC.chain_2 <- S.CARleroux(
  formula = formula_2,
  data = vars_1.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

var_2_MCMC.chain_3 <- S.CARleroux(
  formula = formula_2,
  data = vars_1.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

beta_var_2.samples_2007_2012 <- 
  mcmc.list(var_2_MCMC.chain_1$samples$beta, 
            var_2_MCMC.chain_2$samples$beta,
            var_2_MCMC.chain_3$samples$beta)

beta_var_2.samples_2007_2012.m <- 
  rbind(var_2_MCMC.chain_1$samples$beta, 
        var_2_MCMC.chain_2$samples$beta,
        var_2_MCMC.chain_3$samples$beta)

colnames(beta_var_2.samples_2007_2012.m) <- 
  colnames(var_2_MCMC.chain_1$X)

```

```{r meetup-spatial-analysis-model-2}

## 2012-2013

formula_1 = 
  sqrt(rsvp_sum_2012_2013_perc) ~ 
  turnout_2006_08 + sqrt(NP_949_perc) + dd_2013_perc +
  P1_cut_6 

var_1_MCMC.chain_1 <- S.CARleroux(
  formula = formula_1,
  data = vars_1.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

var_1_MCMC.chain_2 <- S.CARleroux(
  formula =  formula_1, 
  data = vars_1.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

var_1_MCMC.chain_3 <- S.CARleroux(
  formula = formula_1, 
  data = vars_1.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

beta_var_1.samples_2012_2013 <- 
  mcmc.list(var_1_MCMC.chain_1$samples$beta, 
            var_1_MCMC.chain_2$samples$beta,
            var_1_MCMC.chain_3$samples$beta)

beta_var_1.samples_2012_2013.m <- 
  rbind(var_1_MCMC.chain_1$samples$beta, 
        var_1_MCMC.chain_2$samples$beta,
        var_1_MCMC.chain_3$samples$beta)

colnames(beta_var_1.samples_2012_2013.m) <- 
  colnames(var_1_MCMC.chain_1$X)

formula_2 = 
  sqrt(rsvp_sum_2012_2013_perc) ~ 
  dd_2013_perc +
  P47_perc + P62_perc + P130_perc + over65_perc + 
  P1_cut_6 

var_2_MCMC.chain_1 <- S.CARleroux(
  formula = formula_2,
  data = vars_1.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

var_2_MCMC.chain_2 <- S.CARleroux(
  formula =  formula_2, 
  data = vars_1.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

var_2_MCMC.chain_3 <- S.CARleroux(
  formula = formula_2, 
  data = vars_1.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

beta_var_2.samples_2012_2013 <- 
  mcmc.list(var_2_MCMC.chain_1$samples$beta, 
            var_2_MCMC.chain_2$samples$beta,
            var_2_MCMC.chain_3$samples$beta)

beta_var_2.samples_2012_2013.m <- 
  rbind(var_2_MCMC.chain_1$samples$beta, 
        var_2_MCMC.chain_2$samples$beta,
        var_2_MCMC.chain_3$samples$beta)

colnames(beta_var_2.samples_2012_2013.m) <- 
  colnames(var_2_MCMC.chain_1$X)

```

```{r meetup-spatial-analysis-model-5}

## RSVP diff

formula_1 = 
  rsvp_perc_diff ~ 
  turnout_2006_08 + sqrt(NP_949_perc) + dd_2013_perc +
  P1_cut_6 

var_6_MCMC.chain_1 <- S.CARleroux(
  formula = formula_1,
  data = vars_1.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

var_6_MCMC.chain_2 <- S.CARleroux(
  formula =  formula_1, 
  data = vars_1.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

var_6_MCMC.chain_3 <- S.CARleroux(
  formula = formula_1, 
  data = vars_1.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

beta_var_6.samples_diff <- 
  mcmc.list(var_6_MCMC.chain_1$samples$beta, 
            var_6_MCMC.chain_2$samples$beta,
            var_6_MCMC.chain_3$samples$beta)

beta_var_6.samples_diff.m <- 
  rbind(var_6_MCMC.chain_1$samples$beta, 
        var_6_MCMC.chain_2$samples$beta,
        var_6_MCMC.chain_3$samples$beta)

colnames(beta_var_6.samples_diff.m) <- 
  colnames(var_6_MCMC.chain_1$X)

formula_2 = 
  rsvp_perc_diff ~ 
  dd_2013_perc +
  P47_perc + P62_perc + P130_perc + over65_perc + 
  P1_cut_6 

var_7_MCMC.chain_1 <- S.CARleroux(
  formula = formula_2,
  data = vars_1.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

var_7_MCMC.chain_2 <- S.CARleroux(
  formula =  formula_2, 
  data = vars_1.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

var_7_MCMC.chain_3 <- S.CARleroux(
  formula = formula_2, 
  data = vars_1.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

beta_var_7.samples_diff <- 
  mcmc.list(var_7_MCMC.chain_1$samples$beta, 
            var_7_MCMC.chain_2$samples$beta,
            var_7_MCMC.chain_3$samples$beta)

beta_var_7.samples_diff.m <- 
  rbind(var_7_MCMC.chain_1$samples$beta, 
        var_7_MCMC.chain_2$samples$beta,
        var_7_MCMC.chain_3$samples$beta)

colnames(beta_var_7.samples_diff.m) <- 
  colnames(var_7_MCMC.chain_1$X)

```

```{r meetup-spatial-analysis-model-3}

vars_1_scaled.df <- 
  vars_1.df %>%
  dplyr::mutate_at(.vars = c('rsvp_sum_2007_2012_perc',
                             'rsvp_sum_2012_2013_perc',
                             'rsvp_sum_diff',
                             'P47_perc', 
                             'P47_perc', 'P62_perc',
                             'P130_perc', 'over65_perc',
                             'dd_2013_perc'),
                   scale)

# Predict electoral results

## 2007-12

var_3_MCMC.chain_1 <- S.CARleroux(
  formula =   M5S_2013_perc ~
    rsvp_sum_2007_2012_perc + dd_2013_perc +
    P47_perc + P62_perc + P130_perc + over65_perc + 
    P1_cut_6,
  data = vars_1_scaled.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

var_3_MCMC.chain_2 <- S.CARleroux(
  formula =   M5S_2013_perc ~
    rsvp_sum_2007_2012_perc + dd_2013_perc +
    P47_perc + P62_perc + P130_perc + over65_perc + 
    P1_cut_6 ,
  data = vars_1_scaled.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

var_3_MCMC.chain_3 <- S.CARleroux(
  formula =   M5S_2013_perc ~
    rsvp_sum_2007_2012_perc + dd_2013_perc +
    P47_perc + P62_perc + P130_perc + over65_perc + 
    P1_cut_6 ,
  data = vars_1_scaled.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

beta_var_3.samples <- 
  mcmc.list(var_3_MCMC.chain_1$samples$beta, 
            var_3_MCMC.chain_2$samples$beta,
            var_3_MCMC.chain_3$samples$beta)

beta_var_3.samples.m <- 
  rbind(var_3_MCMC.chain_1$samples$beta, 
        var_3_MCMC.chain_2$samples$beta,
        var_3_MCMC.chain_3$samples$beta)

colnames(beta_var_3.samples.m) <- 
  colnames(var_3_MCMC.chain_1$X)

## 2012-13

var_4_MCMC.chain_1 <- S.CARleroux(
  formula =   M5S_2013_perc ~
    rsvp_sum_2012_2013_perc + dd_2013_perc +
    P47_perc + P62_perc + P130_perc + over65_perc + 
    P1_cut_6,
  data = vars_1_scaled.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

var_4_MCMC.chain_2 <- S.CARleroux(
  formula =   M5S_2013_perc ~
    rsvp_sum_2012_2013_perc + dd_2013_perc +
    P47_perc + P62_perc + P130_perc + over65_perc + 
    P1_cut_6 ,
  data = vars_1_scaled.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

var_4_MCMC.chain_3 <- S.CARleroux(
  formula =   M5S_2013_perc ~
    rsvp_sum_2012_2013_perc + dd_2013_perc +
    P47_perc + P62_perc + P130_perc + over65_perc + 
    P1_cut_6 ,
  data = vars_1_scaled.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

beta_var_4.samples <- 
  mcmc.list(var_4_MCMC.chain_1$samples$beta, 
            var_4_MCMC.chain_2$samples$beta,
            var_4_MCMC.chain_3$samples$beta)

beta_var_4.samples.m <- 
  rbind(var_4_MCMC.chain_1$samples$beta, 
        var_4_MCMC.chain_2$samples$beta,
        var_4_MCMC.chain_3$samples$beta)

colnames(beta_var_4.samples.m) <- 
  colnames(var_4_MCMC.chain_1$X)

## Diff

var_5_MCMC.chain_1 <- S.CARleroux(
  formula =   M5S_2013_perc ~
    rsvp_sum_diff + dd_2013_perc +
    P47_perc + P62_perc + P130_perc + over65_perc + 
    P1_cut_6,
  data = vars_1_scaled.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

var_5_MCMC.chain_2 <- S.CARleroux(
  formula =   M5S_2013_perc ~
    rsvp_sum_diff + dd_2013_perc +
    P47_perc + P62_perc + P130_perc + over65_perc + 
    P1_cut_6 ,
  data = vars_1_scaled.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE,
   n.cores=n_cores
)

var_5_MCMC.chain_3 <- S.CARleroux(
  formula =   M5S_2013_perc ~
    rsvp_sum_diff + dd_2013_perc +
    P47_perc + P62_perc + P130_perc + over65_perc + 
    P1_cut_6 ,
  data = vars_1_scaled.df,
  family = "gaussian",
  W = hex.m,
  burnin=100000, n.sample=300000, thin=100,
  verbose = TRUE
)

beta_var_5.samples <- 
  mcmc.list(var_5_MCMC.chain_1$samples$beta, 
            var_5_MCMC.chain_2$samples$beta,
            var_5_MCMC.chain_3$samples$beta)

beta_var_5.samples.m <- 
  rbind(var_5_MCMC.chain_1$samples$beta, 
        var_5_MCMC.chain_2$samples$beta,
        var_5_MCMC.chain_3$samples$beta)

colnames(beta_var_5.samples.m) <- 
  colnames(var_5_MCMC.chain_1$X)

```

```{r meetup-spatial-analysis-model-4, eval = T}

# Rolling

rolling_posts.df <-
  data.frame()

for (i in 1:length(week_seq)) {
  
  print(i)
  
  this_vars.df <- 
    vars_1.df %>%
    dplyr::left_join(meetup_rsvp_count_hex_week.df %>%
                       dplyr::filter(as.character(week) == week_seq[i]) %>%
                       dplyr::select(-week),
                     by = "hex")
  
  this_vars.df <- 
    this_vars.df %>%
    dplyr::left_join(hex_comuni8092_digital_divide.df %>%
                       dplyr::filter(TIME == format(week_seq[i], "%Y")) %>%
                       dplyr::mutate(dd_perc = (Value * penalty) / 100) %>%
                       dplyr::select(hex, dd_perc),
                     by = "hex")
  
  this_vars.df$sum_yes_rsvp_count[is.na(this_vars.df$sum_yes_rsvp_count)] <- 0 
  
  try({
    
    this_MCMC <- S.CARleroux(
      formula =     
        sqrt(sum_yes_rsvp_count) ~ 
        dd_perc + 
        turnout_2006_08 + sqrt(NP_949_perc) + P1_cut_6,
      data = this_vars.df,
      family = "gaussian",
      W = hex.m,
      burnin=100000, n.sample=300000, thin=100,
      verbose = TRUE,
   n.cores=n_cores
    )
    
    this_betas.df <- 
      this_MCMC$samples$beta
    
    colnames(this_betas.df) <- 
      colnames(this_MCMC$X)
    
    rolling_posts.df <- 
      rolling_posts.df %>%
      dplyr::bind_rows(
        round(t(rbind(apply(this_betas.df, 2, mean), 
                      apply(this_betas.df, 2, quantile, 
                            c(0.025, 0.975)))), 5) %>%
          as.data.frame() %>%
          dplyr::mutate(week =  week_seq[i]))
    
  })
  
}

save(rolling_posts.df, file = "data/out/meetup_NP_949_rolling_posts.df.RData")

```


# Individual level

```{r itanes-individual-level}

sf_use_s2(FALSE) # see https://github.com/r-spatial/sf/issues/1762

itanes_2013_vars <-
  itanes_2013 %>%
  dplyr::mutate(
    
    age = d1, 
    
    sex = 
      as_factor(sex),
    
     metro = comune %in%
      c("MILANO","ROMA","NAPOLI"),
    
    pol_part = 
      rescale((d21_1 == 1) + 
                (d21_2 == 1) +
                (d21_3 == 1) +
                (d21_4 == 1) +
                (d21_5 == 1) +
                (d21_6 == 1) +
                (d21_7 == 1) +
                (d21_8 == 1),
              to = c(1,10)),
    
    pol_interest = 
      rescale(toNumeric(d27, 5:6), to = c(10,1)),

    social_capital_bin = 
      d78_1 == 1 | 
      d78_5 == 1 |
      d78_6 == 1 |
      d78_8 == 1,
    
    nat_pol_trust = 
      rescale(toNumeric(d15_1, 5:6) +
                toNumeric(d15_2, 5:6),
              to = c(1,10)),
    
    discontent = 
      rescale(toNumeric(d15_1, 5:6) +
                toNumeric(d15_2, 5:6),
              to = c(10,1)),
    
    discontent_type = 
                  factor(case_when(nat_pol_trust < 
                                     median(nat_pol_trust, na.rm = T) &
                                     social_capital_bin ~ "discontent with social capital",
                                   nat_pol_trust < 
                                     median(nat_pol_trust, na.rm = T) &
                                     !social_capital_bin ~ "discontent without social capital",
                                   nat_pol_trust >= 
                                     median(nat_pol_trust, na.rm = T) 
                                   ~ "not discontent",
                                   TRUE ~ "NA"),
                         levels = rev(c('discontent without social capital', 
                                        "NA", 'not discontent', 
                                        'discontent with social capital'))),
    
    pol_efficacy_int =
      rescale(toNumeric(d28, 6:7), to = c(1,10)),
    
    NOSAY =
      rescale(toNumeric(d38_1, 5:6),
          to = c(10,0)),
    
    COMPLEXITY =
      rescale(toNumeric(d38_2, 5:6),
          to = c(10,0)),
    
    DISTANCE =
      rescale(toNumeric(d38_3, 5:6),
          to = c(10,0)),
    
    NOCARE =
      rescale(toNumeric(d38_4, 5:6),
          to = c(10,0)),
    
    pol_efficacy_ext =
      NOSAY +
      COMPLEXITY + 
      DISTANCE +
      NOCARE,
    
    PRO_COM_T = 
      sprintf("%06d", istat),
    
    voted_M5S_2013 = 
      d90==5,
    
    no_vote = 
      d86 == 1,
    
    education = 
      toNumeric(stu, 99),

    municipality_size = 
      toNumeric(ampiezza)
    
  ) %>%
  dplyr::mutate_at(.vars = c('nat_pol_trust', 'discontent',
                             'pol_interest', 
                             'pol_efficacy_int', 'pol_efficacy_ext',
                             'education', 'age',
                             "municipality_size", "pol_part",
                             'NOSAY', 'NOCARE', 'COMPLEXITY', 'DISTANCE'),
                   scale)

# TYPO in SURVEY 

itanes_2013_vars$PRO_COM_T[
  itanes_2013_vars$comune == "GRUMO NEVANO" &
  itanes_2013_vars$PRO_COM_T == "063049"] <- '063036'

itanes_2013.sf <- 
  comuni8092.sf %>%
  dplyr::filter(PRO_COM_T %in% unique(itanes_2013_vars$PRO_COM_T))

res <-
  st_within(sez2011_lonlat.sf,
            itanes_2013.sf %>%
              st_transform(4326))

sez2011_lonlat.sf$PRO_COM_T_8092 <- 
  itanes_2013.sf$PRO_COM_T[
    unlist(lapply(res,
                  FUN = function(x) return(ifelse(length(x) == 0, NA, x))))]

sez2011_lonlat_itanes2013.sf <-
  sez2011_lonlat.sf %>%
  dplyr::filter(!is.na(PRO_COM_T_8092))

sez2011_lonlat_itanes2013.sf <- 
  merge(sez2011_lonlat_itanes2013.sf,
        sez2011_pop.df %>%
          dplyr::select(sez2011, P17:P29, P33:P45, P47, P54, P130),
        by = 'sez2011')

sez2011_lonlat_itanes2013.df <- 
  sez2011_lonlat_itanes2013.sf

st_geometry(sez2011_lonlat_itanes2013.df) <- NULL

sez2011_lonlat_itanes2013.df$lon <- 
  st_coordinates(sez2011_lonlat_itanes2013.sf)[,1]
sez2011_lonlat_itanes2013.df$lat <- 
  st_coordinates(sez2011_lonlat_itanes2013.sf)[,2]

sez2011_lonlat_itanes2013.df$NP_949 <-
  sez2011_ind_wide.df$NP_949[match(sez2011_lonlat_itanes2013.df$sez2011,
                                   sez2011_ind_wide.df$SEZ2011)]

NP_949_PRO_COM_T_8092.sum <-
  sez2011_lonlat_itanes2013.df %>%
  dplyr::group_by(PRO_COM_T_8092) %>%
  dplyr::summarise(NP_949 = sum(NP_949,na.rm = T))

itanes_2013_vars$PRO_COM_T_pop_2011 <-
  itanes_2013.sf$POP_2011[match(itanes_2013_vars$PRO_COM_T,
                                itanes_2013.sf$PRO_COM_T)]

res <-
  sf::st_intersects(st_make_valid(itanes_2013.sf),
                    italy_hex.sf)

PRO_COM_T_rsvp.df <- 
  data.frame(PRO_COM_T = itanes_2013.sf$PRO_COM_T,
             Shape_Area = itanes_2013.sf$Shape_Area,
             rsvp_sum_2007_2012 = NA,
             rsvp_sum_2012_2013 = NA)

for (i in 1:nrow(PRO_COM_T_rsvp.df)) {
  
  PRO_COM_T_rsvp.df$rsvp_sum_2007_2012[i] <- 
    sum(vars_1.df$rsvp_sum_2007_2012[vars_1.df$hex %in% 
                                       italy_hex.sf$index[res[[i]]]])
  
  PRO_COM_T_rsvp.df$rsvp_sum_2012_2013[i] <- 
    sum(vars_1.df$rsvp_sum_2012_2013[vars_1.df$hex %in% 
                                   italy_hex.sf$index[res[[i]]]])
  
}

PRO_COM_T_rsvp.df$POP_2011 <- 
  comuni8092.sf$POP_2011[match(PRO_COM_T_rsvp.df$PRO_COM_T,
                               comuni8092.sf$PRO_COM_T)]

PRO_COM_T_rsvp.df$rsvp_sum_2007_2012_perc <- 
  PRO_COM_T_rsvp.df$rsvp_sum_2007_2012 / PRO_COM_T_rsvp.df$POP_2011

PRO_COM_T_rsvp.df$rsvp_sum_2012_2013_perc <- 
  PRO_COM_T_rsvp.df$rsvp_sum_2012_2013 / PRO_COM_T_rsvp.df$POP_2011

itanes_2013_vars$rsvp_sum_2007_2012_perc <- 
  PRO_COM_T_rsvp.df$rsvp_sum_2007_2012_perc[match(itanes_2013_vars$PRO_COM_T,
                                                  PRO_COM_T_rsvp.df$PRO_COM_T)]

itanes_2013_vars$rsvp_sum_2012_2013_perc <- 
  PRO_COM_T_rsvp.df$rsvp_sum_2012_2013_perc[match(itanes_2013_vars$PRO_COM_T,
                                                  PRO_COM_T_rsvp.df$PRO_COM_T)]

itanes_2013_vars$rsvp_sum_diff <- 
  scale(sqrt(itanes_2013_vars$rsvp_sum_2012_2013_perc) -
          sqrt(itanes_2013_vars$rsvp_sum_2007_2012_perc))


voted_m5s_2013.glm_1 <- 
  
  glm(voted_M5S_2013 ~ 
        
        age +
        
        sex +
        
        education + 
        
        pol_efficacy_int +
        
        discontent + 
        
        NOSAY + NOCARE + COMPLEXITY + DISTANCE + 
        
        pol_interest + 
        
        social_capital_bin + 
        
        pol_part + 
        
        municipality_size,
      
      data = itanes_2013_vars,
      
      family = 'binomial')

voted_m5s_2013.glm_1.df <- 
  confint(voted_m5s_2013.glm_1) %>%
  as.data.frame() %>%
  tibble::rownames_to_column("var") %>%
  dplyr::mutate(model = "voted_m5s_2013.glm_1") %>%
  dplyr::left_join(voted_m5s_2013.glm_1$coefficients %>% 
                     as.data.frame(col.names = "est") %>%
                     tibble::rownames_to_column("var") %>%
                     dplyr::rename(est = '.'),
                   by = 'var')

discontent.lm_1 <- 
  
  lm(discontent ~ 
       
       age +
       
       sex +
       
       education + 
      
       pol_efficacy_ext + 
       
       pol_interest + 
      
       social_capital_bin +
       
       pol_part + 
       
       municipality_size,
       
    data = itanes_2013_vars)

discontent.lm_1.df <- 
  confint(discontent.lm_1) %>%
  as.data.frame() %>%
  tibble::rownames_to_column("var") %>%
  dplyr::mutate(model = "discontent.lm_1") %>%
  dplyr::left_join(discontent.lm_1$coefficients %>% 
                     as.data.frame(col.names = "est") %>%
                     tibble::rownames_to_column("var") %>%
                     dplyr::rename(est = '.'),
                   by = 'var')

discontent.lm_2 <- 
  
  lm(discontent ~ 
       
       age +
       
       sex +
       
       education + 
       
       pol_interest + 
       
       social_capital_bin + 
       
       pol_part + 
       
       NOSAY + NOCARE + COMPLEXITY + DISTANCE + 
       
       municipality_size,
     
     data = itanes_2013_vars)

discontent.lm_2.df <- 
  confint(discontent.lm_2) %>%
  as.data.frame() %>%
  tibble::rownames_to_column("var") %>%
  dplyr::mutate(model = "discontent.lm_2") %>%
  dplyr::left_join(discontent.lm_2$coefficients %>% 
                     as.data.frame(col.names = "est") %>%
                     tibble::rownames_to_column("var") %>%
                     dplyr::rename(est = '.'),
                   by = 'var')

test.lmer <- 
  lme4::lmer(formula = pol_efficacy_int ~ 
               
               1 + (1 | PRO_COM_T),
             
             data = itanes_2013_vars[!itanes_2013_vars$metro,])

pol_efficacy_int_1.lmer <- 
  
  lme4::lmer(formula = pol_efficacy_int   ~ 
            
               age +
               
               sex +
               
               education + 

               rsvp_sum_diff + 
               
               pol_interest + 
               
               social_capital_bin + 
               
               pol_part +
               
               municipality_size + 

               (1 | PRO_COM_T),
             
             data = itanes_2013_vars[!itanes_2013_vars$metro,])

pol_efficacy_int_1.lmer.df <- 
  confint(pol_efficacy_int_1.lmer) %>%
  as.data.frame() %>%
  tibble::rownames_to_column("var") %>%
  dplyr::mutate(model = "pol_efficacy_int_1.lmer") %>%
  dplyr::left_join(lme4::fixef(pol_efficacy_int_1.lmer) %>% 
                     as.data.frame(col.names = "est") %>%
                     tibble::rownames_to_column("var") %>%
                     dplyr::rename(est = '.'),
                   by = 'var')

pol_efficacy_int_1a.lmer <- 
  
  lme4::lmer(formula = pol_efficacy_int   ~ 
            
               age +
               
               sex +
               
               education + 

               scale(sqrt(rsvp_sum_2007_2012_perc)) + 
               
               pol_interest + 
               
               social_capital_bin + 
               
               pol_part +
               
               municipality_size + 

               (1 | PRO_COM_T),
             
             data = itanes_2013_vars[!itanes_2013_vars$metro,])

pol_efficacy_int_1a.lmer.df <- 
  confint(pol_efficacy_int_1a.lmer) %>%
  as.data.frame() %>%
  tibble::rownames_to_column("var") %>%
  dplyr::mutate(model = "pol_efficacy_int_1a.lmer") %>%
  dplyr::left_join(lme4::fixef(pol_efficacy_int_1a.lmer) %>% 
                     as.data.frame(col.names = "est") %>%
                     tibble::rownames_to_column("var") %>%
                     dplyr::rename(est = '.'),
                   by = 'var')

pol_efficacy_int_1b.lmer <- 
  
  lme4::lmer(formula = pol_efficacy_int   ~ 
            
               age +
               
               sex +
               
               education + 

               scale(sqrt(rsvp_sum_2012_2013_perc)) + 
               
               pol_interest + 
               
               social_capital_bin + 
               
               pol_part +
               
               municipality_size + 

               (1 | PRO_COM_T),
             
             data = itanes_2013_vars[!itanes_2013_vars$metro,])

pol_efficacy_int_1b.lmer.df <- 
  confint(pol_efficacy_int_1b.lmer) %>%
  as.data.frame() %>%
  tibble::rownames_to_column("var") %>%
  dplyr::mutate(model = "pol_efficacy_int_1b.lmer") %>%
  dplyr::left_join(lme4::fixef(pol_efficacy_int_1b.lmer) %>% 
                     as.data.frame(col.names = "est") %>%
                     tibble::rownames_to_column("var") %>%
                     dplyr::rename(est = '.'),
                   by = 'var')

pol_efficacy_int_2.lmer <- 
  
  lme4::lmer(formula = pol_efficacy_int   ~ 
            
               age +
               
               sex +
               
               education + 

               rsvp_sum_diff + 
               
               pol_interest + 
               
               social_capital_bin + 
               
               pol_part +
               
               municipality_size + 

               (1 | PRO_COM_T),
             
             data = itanes_2013_vars)

pol_efficacy_int_2.lmer.df <- 
  confint(pol_efficacy_int_2.lmer) %>%
  as.data.frame() %>%
  tibble::rownames_to_column("var") %>%
  dplyr::mutate(model = "pol_efficacy_int_2.lmer") %>%
  dplyr::left_join(lme4::fixef(pol_efficacy_int_2.lmer) %>% 
                     as.data.frame(col.names = "est") %>%
                     tibble::rownames_to_column("var") %>%
                     dplyr::rename(est = '.'),
                   by = 'var')

pol_efficacy_int_2a.lmer <- 
  
  lme4::lmer(formula = pol_efficacy_int   ~ 
            
               age +
               
               sex +
               
               education + 

               scale(sqrt(rsvp_sum_2007_2012_perc)) +
               
               pol_interest + 
               
               social_capital_bin + 
               
               pol_part +
               
               municipality_size + 

               (1 | PRO_COM_T),
             
             data = itanes_2013_vars)

pol_efficacy_int_2a.lmer.df <- 
  confint(pol_efficacy_int_2a.lmer) %>%
  as.data.frame() %>%
  tibble::rownames_to_column("var") %>%
  dplyr::mutate(model = "pol_efficacy_int_2a.lmer") %>%
  dplyr::left_join(lme4::fixef(pol_efficacy_int_2a.lmer) %>% 
                     as.data.frame(col.names = "est") %>%
                     tibble::rownames_to_column("var") %>%
                     dplyr::rename(est = '.'),
                   by = 'var')

pol_efficacy_int_2b.lmer <- 
  
  lme4::lmer(formula = pol_efficacy_int   ~ 
            
               age +
               
               sex +
               
               education + 

               scale(sqrt(rsvp_sum_2012_2013_perc)) +
               
               pol_interest + 
               
               social_capital_bin + 
               
               pol_part +
               
               municipality_size + 

               (1 | PRO_COM_T),
             
             data = itanes_2013_vars)

pol_efficacy_int_2b.lmer.df <- 
  confint(pol_efficacy_int_2b.lmer) %>%
  as.data.frame() %>%
  tibble::rownames_to_column("var") %>%
  dplyr::mutate(model = "pol_efficacy_int_2b.lmer") %>%
  dplyr::left_join(lme4::fixef(pol_efficacy_int_2b.lmer) %>% 
                     as.data.frame(col.names = "est") %>%
                     tibble::rownames_to_column("var") %>%
                     dplyr::rename(est = '.'),
                   by = 'var')

pol_efficacy_int_3.lm <- 
  
  lm(formula = pol_efficacy_int   ~ 
            
               age +
               
               sex +
               
               education + 

               rsvp_sum_diff + 
               
               pol_interest + 
               
               social_capital_bin + 
               
               pol_part +
               
               municipality_size,
             
             data = itanes_2013_vars[!itanes_2013_vars$metro,])

pol_efficacy_int_3.lm.df <- 
  confint(pol_efficacy_int_3.lm) %>%
  as.data.frame() %>%
  tibble::rownames_to_column("var") %>%
  dplyr::mutate(model = "pol_efficacy_int_3.lm") %>%
  dplyr::left_join(pol_efficacy_int_3.lm$coefficients %>% 
                     as.data.frame(col.names = "est") %>%
                     tibble::rownames_to_column("var") %>%
                     dplyr::rename(est = '.'),
                   by = 'var')

pol_efficacy_int_4.lm <- 
  
  lm(formula = pol_efficacy_int   ~ 
            
               age +
               
               sex +
               
               education + 

               rsvp_sum_diff + 
               
               pol_interest + 
               
               social_capital_bin + 
               
               pol_part +
               
               municipality_size,
             
             data = itanes_2013_vars)

pol_efficacy_int_4.lm.df <- 
  confint(pol_efficacy_int_4.lm) %>%
  as.data.frame() %>%
  tibble::rownames_to_column("var") %>%
  dplyr::mutate(model = "pol_efficacy_int_4.lm") %>%
  dplyr::left_join(pol_efficacy_int_4.lm$coefficients %>% 
                     as.data.frame(col.names = "est") %>%
                     tibble::rownames_to_column("var") %>%
                     dplyr::rename(est = '.'),
                   by = 'var')

voted_m5s_2013.glm_2 <- 
  
  glm(voted_M5S_2013 ~ 
        
        age +
        
        sex +
        
        education + 
        
        pol_efficacy_int *
        
        discontent + 

        pol_interest + 
        
        social_capital_bin +
        
        pol_part + 
        
        municipality_size,
      
      data = itanes_2013_vars,
      
      family = 'binomial')

voted_m5s_2013.glm_2.df <- 
  confint(voted_m5s_2013.glm_2) %>%
  as.data.frame() %>%
  tibble::rownames_to_column("var") %>%
  dplyr::mutate(model = "voted_m5s_2013.glm_2") %>%
  dplyr::left_join(voted_m5s_2013.glm_2$coefficients %>% 
                     as.data.frame(col.names = "est") %>%
                     tibble::rownames_to_column("var") %>%
                     dplyr::rename(est = '.'),
                   by = 'var')

```


# Figures

```{r eval = T}

# Run this to read from cache

knitr_cache_path = 'main/latex/'

capture.output(
  knitr::load_cache(label = "meetup-spatial-analysis-model-1",
                    object = 'beta_var_1.samples_2007_2012.m',
                    path = knitr_cache_path),
  file='NUL')

capture.output(
  knitr::load_cache(label = "meetup-spatial-analysis-model-2",
                    object = 'beta_var_2.samples_2007_2012.m',
                    path = knitr_cache_path),
  file='NUL')

capture.output(
  knitr::load_cache(label = "meetup-spatial-analysis-model-5",
                    object = 'beta_var_6.samples_2007_2012.m',
                    path = knitr_cache_path),
  file='NUL')

capture.output(
  knitr::load_cache(label = "meetup-spatial-analysis-model-5",
                    object = 'beta_var_7.samples_2007_2012.m',
                    path = knitr_cache_path),
  file='NUL')
```


```{r}
map(c('figure/carbayes-predicting-rsvp.pdf', 'figure/carbayes-predicting-rsvp.png'), 
    ~ ggsave(.x, width = 8, heigh =9,
       rbind(
         round(t(rbind(apply(beta_var_1.samples_2007_2012.m, 2, mean), 
                       apply(beta_var_1.samples_2007_2012.m, 2, quantile, 
                             c(0.025, 0.975)))), 5) %>%
           as.data.frame() %>%
           tibble::rownames_to_column(var = "var") %>%
           dplyr::filter(!grepl("Intercept|P1_cut", var)) %>%
           dplyr::mutate(model = "predicting Meetup RVSPs Period 1 (%, sqrt): mod. 1"),
         round(t(rbind(apply(beta_var_2.samples_2007_2012.m, 2, mean), 
                       apply(beta_var_2.samples_2007_2012.m, 2, quantile, 
                             c(0.025, 0.975)))), 5) %>%
           as.data.frame() %>%
           tibble::rownames_to_column(var = "var") %>%
           dplyr::filter(!grepl("Intercept|P1_cut", var)) %>%
           dplyr::mutate(model = "predicting Meetup RVSPs Period 1 (%, sqrt): mod. 2"),
         
         round(t(rbind(apply(beta_var_1.samples_2012_2013.m, 2, mean), 
                       apply(beta_var_1.samples_2012_2013.m, 2, quantile, 
                             c(0.025, 0.975)))), 5) %>%
           as.data.frame() %>%
           tibble::rownames_to_column(var = "var") %>%
           dplyr::filter(!grepl("Intercept|P1_cut", var)) %>%
           dplyr::mutate(model = "predicting Meetup RVSPs Period 2 (%, sqrt): mod. 1"),
         round(t(rbind(apply(beta_var_2.samples_2012_2013.m, 2, mean), 
                       apply(beta_var_2.samples_2012_2013.m, 2, quantile, 
                             c(0.025, 0.975)))), 5) %>%
           as.data.frame() %>%
           tibble::rownames_to_column(var = "var") %>%
           dplyr::filter(!grepl("Intercept|P1_cut", var)) %>%
           dplyr::mutate(model = "predicting Meetup RVSPs Period 2 (%, sqrt): mod. 2"),
         
         round(t(rbind(apply(beta_var_6.samples_diff.m, 2, mean), 
                       apply(beta_var_6.samples_diff.m, 2, quantile, 
                             c(0.025, 0.975)))), 5) %>%
         as.data.frame() %>%
         tibble::rownames_to_column(var = "var") %>%
         dplyr::filter(!grepl("Intercept|P1_cut", var)) %>%
         dplyr::mutate(model = "predicting Meetup RVSPs Period 2 - Period 1 (%, sqrt): mod. 1"),
       round(t(rbind(apply(beta_var_7.samples_diff.m, 2, mean), 
                     apply(beta_var_7.samples_diff.m, 2, quantile, 
                           c(0.025, 0.975)))), 5) %>%
         as.data.frame() %>%
         tibble::rownames_to_column(var = "var") %>%
         dplyr::filter(!grepl("Intercept|P1_cut", var)) %>%
         dplyr::mutate(model = "predicting Meetup RVSPs Period 2 - Period 1 (%, sqrt): mod. 2")) %>%
         dplyr::mutate(var = case_when(var == "turnout_2006_08" ~
                                         "turnout 2006-08 (mean %)",
                                       var == "sqrt(NP_949_perc)" ~
                                         "staff not-for-profit ass. (%, sqrt)",
                                       var == "P1_cut_6" ~ 
                                         "P1_cut_6",
                                       var =="P47_perc" ~ 
                                         "pop. with degree (%)",
                                       var =="P62_perc" ~ 
                                         "pop. unemployed (%)",
                                       var == "P130_perc" ~ 
                                         "pop. homemakers (%)",
                                       var == "over65_perc" ~ 
                                         "pop. over 65 (%)",
                                       var %in% c("dd_2007_2012_perc",
                                                  "dd_2013_perc") ~ 
                                         "hh with Internet (%)",
                                       )) %>%
         ggplot(aes(y = var)) +
         geom_point(aes(x = V1)) +
         geom_errorbar(aes(xmin = `2.5%`, xmax = `97.5%`)) +
         geom_vline(xintercept = 0, alpha = .5) +
         theme_bw() + 
         labs(y = 'predictor', 
              x = "posterior means and 95% credible intervals") +
         facet_wrap(~factor(model, 
                            levels=c("predicting Meetup RVSPs Period 1 (%, sqrt): mod. 1",
                                     "predicting Meetup RVSPs Period 1 (%, sqrt): mod. 2",
                                     "predicting Meetup RVSPs Period 2 (%, sqrt): mod. 1",
                                     "predicting Meetup RVSPs Period 2 (%, sqrt): mod. 2",
                                     "predicting Meetup RVSPs Period 2 - Period 1 (%, sqrt): mod. 1",
                                     "predicting Meetup RVSPs Period 2 - Period 1 (%, sqrt): mod. 2")), 
       ncol = 1, scale = "free_y")))
```

```{r eval = T}

capture.output(
  knitr::load_cache(label = "national-turnout-2013",
                    object = 'hex_italy_eff_voters_2013.df',
                    path = knitr_cache_path),
  file='NUL')

capture.output(
  knitr::load_cache(label = "meetup-spatial-analysis-model-4",
                    object = 'rolling_posts.df',
                    path = knitr_cache_path),
  file='NUL')


```

```{r}

require(DBI)
require(RSQLite)

con <- DBI::dbConnect(RSQLite::SQLite(), 
                      "data/corriere-repubblica/rep_cor_hits_1992_2015.sqlite")

corriere_weights.df <- 
  dbReadTable(con, "corriere_PresCon") %>%
  dplyr::mutate(table = "PresCon") %>%
  dplyr::bind_rows(
    dbReadTable(con, "corriere_SegOpp") %>%
      dplyr::mutate(table = "SegOpp")) %>%
  dplyr::bind_rows(
    dbReadTable(con, "corriere_politica") %>%
      dplyr::mutate(table = "politica")) %>%
  dplyr::bind_rows(
    dbReadTable(con, "corriere_beppe") %>%
      dplyr::mutate(table = "beppe")) %>%
  tidyr::pivot_wider(id_cols = c('from_date', 'to_date'),
                     names_from = 'table',
                     values_from = 'hits')

repubblica_weights.df <- 
  dbReadTable(con, "repubblica_PresCon") %>%
  dplyr::mutate(table = "PresCon") %>%
  dplyr::bind_rows(
    dbReadTable(con, "repubblica_SegOpp") %>%
      dplyr::mutate(table = "SegOpp")) %>%
  dplyr::bind_rows(
    dbReadTable(con, "repubblica_politica") %>%
      dplyr::mutate(table = "politica")) %>%
  dplyr::bind_rows(
    dbReadTable(con, "repubblica_beppe") %>%
      dplyr::mutate(table = "beppe")) %>%
  tidyr::pivot_wider(id_cols = c('from_date', 'to_date'),
                     names_from = 'table',
                     values_from = 'hits')

correp_weights.df <- 
  corriere_weights.df %>%
  dplyr::left_join(repubblica_weights.df, 
                   by = "from_date") %>%
  dplyr::mutate(beppe_perc.x = (beppe.x / politica.x),
                beppe_perc.y = (beppe.y / politica.y),
                PresCon_perc.x = (PresCon.x / politica.x),
                PresCon_perc.y = (PresCon.y / politica.y)) %>%
  dplyr::rowwise() %>% 
  dplyr::mutate(beppe_mean_perc = mean(c(beppe_perc.x, 
                                         beppe_perc.y)),
                PresCon_mean_perc = mean(c(PresCon_perc.x, 
                                           PresCon_perc.y))) %>%
  dplyr::ungroup()

map(c('figure/timeseries-plots.pdf', 'figure/timeseries-plots.png'), 
    ~ ggsave(.x, width = 12, height = 6,
       cowplot::plot_grid(
         meetup_rsvp_count_week.df %>%
           ggplot(aes(x = as.Date(week), y = sum_yes_rsvp_count)) +
           annotate("rect", ymin=0, ymax=15500, 
                    xmin = break_dates[1], 
                    xmax = break_dates[2],
                    colour = "red",
                    alpha = .2) +
           annotate("rect", ymin=0, ymax=15500, 
                    xmin = break_dates[2], 
                    xmax = break_dates[3],
                    colour = "blue",
                    alpha = .2) +
           annotate("text", x = c(as.Date("2010-01-01"), as.Date("2012-10-01")), 
                    y = 12500, label = c("Period\n2007-12", 
                                         "Period\n2012-13")) +
           geom_histogram(stat ='identity') +
           geom_vline(xintercept = break_dates, alpha = .35) +
           coord_cartesian(xlim = c(as.Date("2005-01-01"),
                                    as.Date("2015-01-01"))) +
           theme_bw() + labs(x = "meetup RSVPs by week", y = NULL),
         correp_weights.df %>%
           dplyr::ungroup() %>%
           dplyr::arrange(from_date) %>%
           # dplyr::mutate(beppe_mean_perc = zoo::rollmean(beppe_mean_perc, k = 3, align = "left", fill = NA, na.rm = T),
           #               PresCon_mean_perc = zoo::rollmean(PresCon_mean_perc, k = 3, align = "left", fill = NA, na.rm = T)) %>%
           ggplot(aes(x = as.Date(from_date), y = beppe_mean_perc / PresCon_mean_perc)) +
           geom_histogram(stat ='identity') +
           geom_vline(xintercept = break_dates, alpha = .35) +
           coord_cartesian(xlim = c(as.Date("2005-01-01"),
                                    as.Date("2015-01-01"))) +
           theme_bw() + labs(y = NULL, x = "mention of 'Beppe Grillo' on Corriere della Sera and la Repubblica (as proportion of mentions of the Prime Minister)"),
         rolling_posts.df %>%
           tibble::rownames_to_column("var") %>%
           dplyr::mutate(var = gsub("[\\.]+(.*)$", "", var),
                         week = as.Date(week)) %>%
           dplyr::filter(grepl("NP_949_perc", var)) %>%
           dplyr::mutate(colorcode = case_when(`2.5%` > 0 ~ "pos. and significant",
                                               `2.5%` < 0 & `97.5%` > 0 ~ "not significant",
                                               `2.5%` < 0 & `97.5%` < 0 ~ "neg. and significant")) %>%
           ggplot(aes(x = week, y = V1)) +
           geom_point(aes(color = colorcode)) +
           scale_color_manual(values = c("orange", "blue")) +
           geom_errorbar(aes(ymin = `2.5%`, ymax = `97.5%`), alpha = .2) +
           geom_vline(xintercept = break_dates, alpha = .35) +
           coord_cartesian(xlim = c(as.Date("2005-01-01"),
                                    as.Date("2015-01-01"))) +
           theme_bw() +
           labs(y = NULL, x = "coefficient and confidence interval for staff of not-for-profit associations (%, sqrt)", color = NULL) +
           theme(legend.position = c(0.1, .8),
                 legend.background = element_blank()),
         ncol = 1, align = "v"
       )))
```

```{r eval = T}
capture.output(
  knitr::load_cache(label = "meetup-spatial-analysis-model-3",
                    object = 'beta_var_3.samples.m',
                    path = knitr_cache_path),
  file='NUL')

capture.output(
  knitr::load_cache(label = "meetup-spatial-analysis-model-3",
                    object = 'beta_var_4.samples.m',
                    path = knitr_cache_path),
  file='NUL')

capture.output(
  knitr::load_cache(label = "meetup-spatial-analysis-model-3",
                    object = 'beta_var_5.samples.m',
                    path = knitr_cache_path),
  file='NUL')
```


```{r}

map(c('figure/carbayes-predicting-m5s2013.pdf', 'figure/carbayes-predicting-m5s2013.png'), 
    ~ ggsave(.x, width = 10, height = 5,
       
       
       rbind(
         round(t(rbind(apply(beta_var_3.samples.m, 2, mean), 
                       apply(beta_var_3.samples.m, 2, quantile, 
                             c(0.025, 0.975)))), 5) %>%
           as.data.frame() %>%
           tibble::rownames_to_column(var = "var") %>%
           dplyr::filter(!grepl("Intercept|P1_cut", var)) %>%
           dplyr::mutate(model = "predicting M5S vote in 2013 (%): mod. 1"),
         
         round(t(rbind(apply(beta_var_4.samples.m, 2, mean), 
                       apply(beta_var_4.samples.m, 2, quantile, 
                             c(0.025, 0.975)))), 5) %>%
           as.data.frame() %>%
           tibble::rownames_to_column(var = "var") %>%
           dplyr::filter(!grepl("Intercept|P1_cut", var)) %>%
           dplyr::mutate(model = "predicting M5S vote in 2013 (%): mod. 2"),
         round(t(rbind(apply(beta_var_5.samples.m, 2, mean), 
                       apply(beta_var_5.samples.m, 2, quantile, 
                             c(0.025, 0.975)))), 5) %>%
           as.data.frame() %>%
           tibble::rownames_to_column(var = "var") %>%
           dplyr::filter(!grepl("Intercept|P1_cut", var)) %>%
           dplyr::mutate(model = "predicting M5S vote in 2013 (%): mod. 3")
       ) %>%
         dplyr::mutate(var = case_when(var == "rsvp_sum_diff" ~
                                         "Meetup RSVPs: Period 2 - Period 1 (sqrt)",
                                       var == "rsvp_sum_2007_2012_perc" ~
                                         "Meetup RSVPs: Period 1 (%, sqrt)",
                                       var == "rsvp_sum_2012_2013_perc" ~ 
                                         "Meetup RSVPs: Period 2 (%, sqrt)",
                                       var == "dd_2013_perc" ~
                                         "hh. with Internet (%)",
                                       var =="P47_perc" ~ 
                                         "pop. with degree (%)",
                                       var =="P62_perc" ~ 
                                         "pop. unemployed (%)",
                                       var == "P130_perc" ~ 
                                         "pop. homemakers (%)",
                                       var == "over65_perc" ~ 
                                         "pop. over 65 (%)")) %>%
         dplyr::mutate(var = factor(var, 
                                    levels = 
                                      rev(c("Meetup RSVPs: Period 1 (%, sqrt)",
                                        "Meetup RSVPs: Period 2 (%, sqrt)",
                                        "Meetup RSVPs: Period 2 - Period 1 (sqrt)",
                                        "hh. with Internet (%)",
                                        "pop. with degree (%)",
                                        "pop. unemployed (%)",
                                        "pop. homemakers (%)",
                                        "pop. over 65 (%)")),
                                    ordered = T)) %>%
         ggplot(aes(y = var)) +
         geom_point(aes(x = V1)) +
         geom_errorbar(aes(xmin = `2.5%`, xmax = `97.5%`)) +
         theme_bw() + 
         labs(y = 'predictor', 
              x = "posterior means and 95% credible intervals") +
         facet_wrap('model', ncol = 1)))
```

```{r eval = T}
capture.output(
  knitr::load_cache(label = "itanes-individual-level",
                    object = 'itanes_2013_vars',
                    path = knitr_cache_path),
  file='NUL')

```

```{r}
itanes_individual_models.df <- 
  voted_m5s_2013.glm_1.df %>%
  dplyr::bind_rows(discontent.lm_1.df) %>% 
  dplyr::bind_rows(discontent.lm_2.df) %>% 
  dplyr::bind_rows(pol_efficacy_int_1.lmer.df) %>% 
  dplyr::bind_rows(voted_m5s_2013.glm_2.df) %>% 
  dplyr::bind_rows(voted_m5s_2013.glm_2.df) %>%
  dplyr::filter(model %in% c("pol_efficacy_int_1.lmer", "voted_m5s_2013.glm_2")) %>%
  
  dplyr::filter(!grepl("Intercept|.sig", var)) %>%
  dplyr::mutate(var = factor(case_when(var == "rsvp_sum_diff" ~
                                         "Meetup RSVPs Period 2 - Period 1 (%, sqrt)",
                                       var =="education" ~
                                         "education",
                                       var == "fb_use" ~
                                         "Facebook use",
                                       var == "voted_M5S_2013TRUE" ~
                                         "voted M5S in 2013",
                                       var == "pol_interest" ~
                                         "political interest",
                                       var == "pol_efficacy_int" ~
                                         "freq. political talk",
                                       var == "pol_efficacy_ext" ~
                                         "ext. political efficacy",
                                       var == "municipality_size" ~
                                         "municipality size",
                                       var == "age" ~
                                         "age",
                                       var == "pol_efficacy_int:discontent" ~
                                         "freq. political talk X discontent",
                                       var == "sexFemale" ~
                                         "sex: Female",
                                       var == "social_capital_binTRUE" ~
                                         "member of association: TRUE",
                                       var == "pol_part" ~
                                         "political participation",
                                       var == "NOSAY" ~
                                         "NOSAY",
                                       var == "NOCARE" ~
                                         "NOCARE",
                                       var == "DISTANCE" ~
                                         "DISTANCE",
                                       var == "COMPLEXITY" ~
                                         "COMPLEXITY",
                                       var == "discontent" ~
                                         "discontent",
  )
  ,
  levels = rev(c("age",
                 "sex: Female",
                 "education",
                 "municipality size",
                 
                 
                 "discontent",
                 "ext. political efficacy",  
                 "NOSAY",
                 "NOCARE",
                 "COMPLEXITY",
                 "DISTANCE",
                 "freq. political talk",
                 "political interest",
                 
                 "member of association: TRUE",
                 
                 "political participation",
                 
                 "Meetup RSVPs Period 2 - Period 1 (%, sqrt)",
                 
                 "freq. political talk X discontent"))
  )
  )


facet_names <- as_labeller(
  c(# `discontent.lm_1` = "DV: discontent, LM", 
    # `discontent.lm_2` = "DV: discontent, LM 2", 
    `pol_efficacy_int_1.lmer` = "DV: freq. political talk, Random intercept fixed mean model", 
    # `voted_m5s_2013.glm_1` = "DV: M5S vote, BIN 1", 
    `voted_m5s_2013.glm_2` = "DV: M5S vote, logistic model"
    ))

map(c('figure/models-itanes.pdf', 'figure/models-itanes.png'), 
    ~ ggsave(.x, width = 12, height = 5,
       itanes_individual_models.df %>%
         ggplot(aes(y = var)) +
         geom_point(aes(x = est)) + 
         geom_errorbar(aes(xmin = `2.5 %`, xmax = `97.5 %`)) +
         geom_vline(xintercept = 0, alpha = .5) +
         theme_bw() + 
         labs(y = 'predictor', 
              x = "estimates and 95% confidence intervals") +
         facet_wrap('model', nrow = 1, scale = "free_x", labeller = facet_names)))


map(c('figure/single-model-itanes.pdf', 'figure/single-model-itanes.png'), 
    ~ ggsave(.x, width = 9, height = 4,
       itanes_individual_models.df %>%
         dplyr::filter(model == "pol_efficacy_int_1.lmer") %>%
         ggplot(aes(y = var)) +
         geom_point(aes(x = est)) + 
         geom_errorbar(aes(xmin = `2.5 %`, xmax = `97.5 %`)) +
         geom_vline(xintercept = 0, alpha = .5) +
         theme_bw() + 
         labs(y = 'predictor', 
              x = "estimates and 95% confidence intervals")))

```

```{r}

require(ggpubr)

map(c('figure/parties-discontent_type-bars.pdf', 'figure/parties-discontent_type-bars.png'), 
    ~ ggsave(.x, width = 8, height = 5,
             itanes_2013_vars %>%
               dplyr::mutate(discontent_type = 
                               factor(case_when(nat_pol_trust < 
                                                  median(itanes_2013_vars$nat_pol_trust, na.rm = T) &
                                                  social_capital_bin ~ "within legacy mobilisation networks",
                                                nat_pol_trust < 
                                                  median(itanes_2013_vars$nat_pol_trust, na.rm = T) &
                                                  !social_capital_bin ~ "outside legacy mobilisation networks",
                                                nat_pol_trust >= 
                                                  median(itanes_2013_vars$nat_pol_trust, na.rm = T) 
                                                ~ "not discontent",
                                                TRUE ~ "NA"),
                                      levels = c('within legacy mobilisation networks', 
                                                 'outside legacy mobilisation networks',
                                                 'not discontent', "NA"),
                                      ordered = T)) %>%
               dplyr::mutate(`vote in 2013` = factor(case_when(d90 %in% 5 ~ "M5S",
                                                               !no_vote ~ "other party",
                                                               no_vote ~ "no vote"),
                                                     levels = c("M5S", "other party", "no vote"))) %>%
               dplyr::filter(grepl("networks", discontent_type)) %>%
               ggerrorplot(x = "discontent_type", y = "pol_efficacy_int", color = "vote in 2013",
                           desc_stat = "mean_ci",
                           palette = "Set1",
                           error.plot = "errorbar", add = "mean", 
                           position = position_dodge(0.5)) +
               geom_hline(yintercept = 0, alpha = .5) +
               labs(x = NULL, y = "freq. political talk\n(mean and confidence interval)") +
               theme(legend.position = "bottom")))
```

